upgini 1.1.275a1__py3-none-any.whl → 1.1.276__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/ads.py +6 -2
- upgini/autofe/date.py +9 -2
- upgini/data_source/data_source_publisher.py +1 -1
- upgini/dataset.py +6 -13
- upgini/features_enricher.py +156 -220
- upgini/metadata.py +1 -9
- upgini/metrics.py +12 -0
- upgini/normalizer/phone_normalizer.py +2 -2
- upgini/resource_bundle/strings.properties +2 -2
- upgini/utils/__init__.py +3 -2
- upgini/utils/base_search_key_detector.py +12 -14
- upgini/utils/country_utils.py +2 -2
- upgini/utils/datetime_utils.py +7 -4
- upgini/utils/deduplicate_utils.py +1 -11
- upgini/utils/email_utils.py +2 -7
- upgini/utils/features_validator.py +2 -1
- upgini/utils/target_utils.py +1 -1
- upgini/utils/track_info.py +25 -13
- {upgini-1.1.275a1.dist-info → upgini-1.1.276.dist-info}/METADATA +2 -2
- {upgini-1.1.275a1.dist-info → upgini-1.1.276.dist-info}/RECORD +23 -23
- {upgini-1.1.275a1.dist-info → upgini-1.1.276.dist-info}/LICENSE +0 -0
- {upgini-1.1.275a1.dist-info → upgini-1.1.276.dist-info}/WHEEL +0 -0
- {upgini-1.1.275a1.dist-info → upgini-1.1.276.dist-info}/top_level.txt +0 -0
upgini/metadata.py
CHANGED
|
@@ -4,8 +4,6 @@ from typing import Dict, List, Optional, Set
|
|
|
4
4
|
from pydantic import BaseModel
|
|
5
5
|
|
|
6
6
|
SYSTEM_RECORD_ID = "system_record_id"
|
|
7
|
-
ENTITY_SYSTEM_RECORD_ID = "entity_system_record_id"
|
|
8
|
-
SEARCH_KEY_UNNEST = "search_key_unnest"
|
|
9
7
|
SORT_ID = "sort_id"
|
|
10
8
|
EVAL_SET_INDEX = "eval_set_index"
|
|
11
9
|
TARGET = "target"
|
|
@@ -13,7 +11,7 @@ COUNTRY = "country_iso_code"
|
|
|
13
11
|
RENAMED_INDEX = "index_col"
|
|
14
12
|
DEFAULT_INDEX = "index"
|
|
15
13
|
ORIGINAL_INDEX = "original_index"
|
|
16
|
-
SYSTEM_COLUMNS = {SYSTEM_RECORD_ID,
|
|
14
|
+
SYSTEM_COLUMNS = {SYSTEM_RECORD_ID, EVAL_SET_INDEX, TARGET, COUNTRY, SORT_ID}
|
|
17
15
|
|
|
18
16
|
|
|
19
17
|
class FileColumnMeaningType(Enum):
|
|
@@ -39,8 +37,6 @@ class FileColumnMeaningType(Enum):
|
|
|
39
37
|
POSTAL_CODE = "POSTAL_CODE"
|
|
40
38
|
SYSTEM_RECORD_ID = "SYSTEM_RECORD_ID"
|
|
41
39
|
EVAL_SET_INDEX = "EVAL_SET_INDEX"
|
|
42
|
-
ENTITY_SYSTEM_RECORD_ID = "ENTITY_SYSTEM_RECORD_ID"
|
|
43
|
-
UNNEST_KEY = "UNNEST_KEY"
|
|
44
40
|
|
|
45
41
|
|
|
46
42
|
class SearchKey(Enum):
|
|
@@ -186,10 +182,6 @@ class FileColumnMetadata(BaseModel):
|
|
|
186
182
|
meaningType: FileColumnMeaningType
|
|
187
183
|
minMaxValues: Optional[NumericInterval] = None
|
|
188
184
|
originalName: Optional[str]
|
|
189
|
-
# is this column contains keys from multiple key columns like msisdn1, msisdn2
|
|
190
|
-
isUnnest: bool = False,
|
|
191
|
-
# list of original etalon key column names like msisdn1, msisdn2
|
|
192
|
-
unnestKeyNames: Optional[list[str]]
|
|
193
185
|
|
|
194
186
|
|
|
195
187
|
class FileMetadata(BaseModel):
|
upgini/metrics.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import inspect
|
|
1
2
|
import logging
|
|
2
3
|
import re
|
|
3
4
|
from copy import deepcopy
|
|
@@ -381,6 +382,11 @@ class EstimatorWrapper:
|
|
|
381
382
|
kwargs["estimator"] = estimator_copy
|
|
382
383
|
if isinstance(estimator, CatBoostClassifier) or isinstance(estimator, CatBoostRegressor):
|
|
383
384
|
if cat_features is not None:
|
|
385
|
+
for cat_feature in cat_features:
|
|
386
|
+
if cat_feature not in X.columns:
|
|
387
|
+
logger.error(
|
|
388
|
+
f"Client cat_feature `{cat_feature}` not found in X columns: {X.columns.to_list()}"
|
|
389
|
+
)
|
|
384
390
|
estimator_copy.set_params(
|
|
385
391
|
cat_features=[X.columns.get_loc(cat_feature) for cat_feature in cat_features]
|
|
386
392
|
)
|
|
@@ -647,6 +653,12 @@ class OtherEstimatorWrapper(EstimatorWrapper):
|
|
|
647
653
|
def validate_scoring_argument(scoring: Union[Callable, str, None]):
|
|
648
654
|
if isinstance(scoring, str) and scoring is not None:
|
|
649
655
|
_get_scorer_by_name(scoring)
|
|
656
|
+
elif isinstance(scoring, Callable):
|
|
657
|
+
spec = inspect.getfullargspec(scoring)
|
|
658
|
+
if len(spec.args) < 3:
|
|
659
|
+
raise ValidationError(
|
|
660
|
+
f"Invalid scoring function passed {scoring}. It should accept 3 input arguments: estimator, X, y"
|
|
661
|
+
)
|
|
650
662
|
|
|
651
663
|
|
|
652
664
|
def _get_scorer_by_name(scoring: str) -> Tuple[Callable, str, int]:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
2
|
|
|
3
3
|
import pandas as pd
|
|
4
|
-
from pandas.api.types import is_float_dtype, is_int64_dtype, is_string_dtype
|
|
4
|
+
from pandas.api.types import is_float_dtype, is_int64_dtype, is_string_dtype, is_object_dtype
|
|
5
5
|
|
|
6
6
|
from upgini.errors import ValidationError
|
|
7
7
|
|
|
@@ -44,7 +44,7 @@ class PhoneNormalizer:
|
|
|
44
44
|
Method will remove all non numeric chars from string and convert it to int.
|
|
45
45
|
None will be set for phone numbers that couldn"t be converted to int
|
|
46
46
|
"""
|
|
47
|
-
if is_string_dtype(self.df[self.phone_column_name]):
|
|
47
|
+
if is_string_dtype(self.df[self.phone_column_name]) or is_object_dtype(self.df[self.phone_column_name]):
|
|
48
48
|
convert_func = self.phone_str_to_int_safe
|
|
49
49
|
elif is_float_dtype(self.df[self.phone_column_name]):
|
|
50
50
|
convert_func = self.phone_float_to_int_safe
|
|
@@ -38,6 +38,7 @@ loss_selection_warn=\nWARNING: Loss `{0}` is not supported for feature selection
|
|
|
38
38
|
loss_calc_metrics_warn=\nWARNING: Loss `{0}` is not supported for metrics calculation with {1}
|
|
39
39
|
multivariate_timeseries_detected=\nWARNING: Multivariate TimeSeries detected. Blocked time series cross-validation split selected.\nMore details: https://github.com/upgini/upgini#-time-series-prediction-support
|
|
40
40
|
group_k_fold_in_classification=\nWARNING: Using group K-fold cross-validation split for classification task.
|
|
41
|
+
current_date_added=\nWARNING: No date/datetime column was detected in X to be used as a search key. The current date will be used to match the latest version of data sources
|
|
41
42
|
|
|
42
43
|
# Errors
|
|
43
44
|
failed_search_by_task_id=Failed to retrieve the specified search results
|
|
@@ -87,7 +88,6 @@ unsupported_search_key_type=Unsupported type of key in search_keys: {}
|
|
|
87
88
|
search_key_country_and_country_code=\nWARNING: SearchKey.COUNTRY and country_code parameter were passed simultaniously. Parameter country_code will be ignored
|
|
88
89
|
empty_search_key=Search key {} is empty. Please fill values or remove this search key
|
|
89
90
|
single_constant_search_key=\nWARNING: Constant value detected for the {} search key in the X dataframe: {}.\nThat search key will add constant features for different y values.\nPlease add extra search keys with non constant values, like the COUNTRY, POSTAL_CODE, DATE, PHONE NUMBER, EMAIL/HEM or IPv4
|
|
90
|
-
unsupported_multi_key=Search key {} cannot be used multiple times
|
|
91
91
|
unsupported_index_column=\nWARNING: Your column with name `index` was dropped because it's reserved name is booked for system needs.
|
|
92
92
|
date_string_without_format=Date column `{}` has string type, but date_format is not specified. Convert column to datetime type or pass date_format
|
|
93
93
|
invalid_date_format=Failed to parse date in column `{}`. Try to pass explicit date format in date_format argument of FeaturesEnricher constructor
|
|
@@ -159,7 +159,7 @@ dataset_invalid_multiclass_target=Unexpected dtype of target for multiclass task
|
|
|
159
159
|
dataset_invalid_regression_target=Unexpected dtype of target for regression task type: {}. Expected float
|
|
160
160
|
dataset_invalid_timeseries_target=Unexpected dtype of target for timeseries task type: {}. Expected float
|
|
161
161
|
dataset_to_many_multiclass_targets=The number of target classes {} exceeds the allowed threshold: {}. Please, correct your data and try again
|
|
162
|
-
dataset_rarest_class_less_min=
|
|
162
|
+
dataset_rarest_class_less_min=Count of rows with the rarest class `{}` is {}, minimum count must be > {} for each class\nPlease, remove rows with rarest class from your dataframe
|
|
163
163
|
dataset_rarest_class_less_threshold=\nWARNING: Target is imbalanced and will be undersampled to the rarest class. Frequency of the rarest class `{}` is {}\nMinimum number of observations for each class to avoid undersampling {} ({}%)
|
|
164
164
|
dataset_date_features=\nWARNING: Columns {} is a datetime or period type but not used as a search key, removed from X
|
|
165
165
|
dataset_too_many_features=Too many features. Maximum number of features is {}
|
upgini/utils/__init__.py
CHANGED
|
@@ -2,7 +2,7 @@ import itertools
|
|
|
2
2
|
from typing import List, Tuple
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
|
-
from pandas.api.types import is_string_dtype
|
|
5
|
+
from pandas.api.types import is_string_dtype, is_object_dtype
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def combine_search_keys(search_keys: List[str]) -> List[Tuple[str]]:
|
|
@@ -20,5 +20,6 @@ def find_numbers_with_decimal_comma(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
20
20
|
return [
|
|
21
21
|
col
|
|
22
22
|
for col in tmp.columns
|
|
23
|
-
if is_string_dtype(tmp[col])
|
|
23
|
+
if (is_string_dtype(tmp[col]) or is_object_dtype(tmp[col]))
|
|
24
|
+
and tmp[col].astype("string").str.match("^[0-9]+,[0-9]*$").any()
|
|
24
25
|
]
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List
|
|
1
|
+
from typing import List, Optional
|
|
2
2
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
|
|
@@ -10,18 +10,16 @@ class BaseSearchKeyDetector:
|
|
|
10
10
|
def _is_search_key_by_values(self, column: pd.Series) -> bool:
|
|
11
11
|
raise NotImplementedError()
|
|
12
12
|
|
|
13
|
-
def
|
|
14
|
-
|
|
15
|
-
column_name
|
|
16
|
-
|
|
17
|
-
if self._is_search_key_by_name(column_name)
|
|
18
|
-
]
|
|
13
|
+
def _get_search_key_by_name(self, column_names: List[str]) -> Optional[str]:
|
|
14
|
+
for column_name in column_names:
|
|
15
|
+
if self._is_search_key_by_name(column_name):
|
|
16
|
+
return column_name
|
|
19
17
|
|
|
20
|
-
def
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
18
|
+
def get_search_key_column(self, df: pd.DataFrame) -> Optional[str]:
|
|
19
|
+
maybe_column = self._get_search_key_by_name(df.columns.to_list())
|
|
20
|
+
if maybe_column is not None:
|
|
21
|
+
return maybe_column
|
|
22
|
+
|
|
23
|
+
for column_name in df.columns:
|
|
25
24
|
if self._is_search_key_by_values(df[column_name]):
|
|
26
|
-
|
|
27
|
-
return list(set(columns_by_names + columns_by_values))
|
|
25
|
+
return column_name
|
upgini/utils/country_utils.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
|
-
from pandas.api.types import is_string_dtype
|
|
2
|
+
from pandas.api.types import is_string_dtype, is_object_dtype
|
|
3
3
|
|
|
4
4
|
from upgini.utils.base_search_key_detector import BaseSearchKeyDetector
|
|
5
5
|
|
|
@@ -9,7 +9,7 @@ class CountrySearchKeyDetector(BaseSearchKeyDetector):
|
|
|
9
9
|
return "country" in str(column_name).lower()
|
|
10
10
|
|
|
11
11
|
def _is_search_key_by_values(self, column: pd.Series) -> bool:
|
|
12
|
-
if not is_string_dtype(column):
|
|
12
|
+
if not is_string_dtype(column) and not is_object_dtype(column):
|
|
13
13
|
return False
|
|
14
14
|
|
|
15
15
|
all_count = len(column)
|
upgini/utils/datetime_utils.py
CHANGED
|
@@ -6,7 +6,10 @@ from typing import Dict, List, Optional
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import pandas as pd
|
|
8
8
|
from dateutil.relativedelta import relativedelta
|
|
9
|
-
from pandas.api.types import
|
|
9
|
+
from pandas.api.types import (
|
|
10
|
+
is_numeric_dtype,
|
|
11
|
+
is_period_dtype,
|
|
12
|
+
)
|
|
10
13
|
|
|
11
14
|
from upgini.errors import ValidationError
|
|
12
15
|
from upgini.metadata import SearchKey
|
|
@@ -78,9 +81,6 @@ class DateTimeSearchKeyConverter:
|
|
|
78
81
|
df[self.date_column] = df[self.date_column].apply(lambda x: x.replace(tzinfo=None))
|
|
79
82
|
elif isinstance(df[self.date_column].values[0], datetime.date):
|
|
80
83
|
df[self.date_column] = pd.to_datetime(df[self.date_column], errors="coerce")
|
|
81
|
-
elif is_string_dtype(df[self.date_column]):
|
|
82
|
-
df[self.date_column] = df[self.date_column].apply(self.clean_date)
|
|
83
|
-
df[self.date_column] = self.parse_date(df)
|
|
84
84
|
elif is_period_dtype(df[self.date_column]):
|
|
85
85
|
df[self.date_column] = pd.to_datetime(df[self.date_column].astype("string"))
|
|
86
86
|
elif is_numeric_dtype(df[self.date_column]):
|
|
@@ -100,6 +100,9 @@ class DateTimeSearchKeyConverter:
|
|
|
100
100
|
msg = self.bundle.get("unsupported_date_type").format(self.date_column)
|
|
101
101
|
self.logger.warning(msg)
|
|
102
102
|
raise ValidationError(msg)
|
|
103
|
+
else:
|
|
104
|
+
df[self.date_column] = df[self.date_column].astype("string").apply(self.clean_date)
|
|
105
|
+
df[self.date_column] = self.parse_date(df)
|
|
103
106
|
|
|
104
107
|
# If column with date is datetime then extract seconds of the day and minute of the hour
|
|
105
108
|
# as additional features
|
|
@@ -3,15 +3,7 @@ from typing import Dict, List, Optional, Union
|
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
|
-
from upgini.metadata import
|
|
7
|
-
ENTITY_SYSTEM_RECORD_ID,
|
|
8
|
-
EVAL_SET_INDEX,
|
|
9
|
-
SORT_ID,
|
|
10
|
-
SYSTEM_RECORD_ID,
|
|
11
|
-
TARGET,
|
|
12
|
-
ModelTaskType,
|
|
13
|
-
SearchKey,
|
|
14
|
-
)
|
|
6
|
+
from upgini.metadata import EVAL_SET_INDEX, SORT_ID, SYSTEM_RECORD_ID, TARGET, ModelTaskType, SearchKey
|
|
15
7
|
from upgini.resource_bundle import ResourceBundle
|
|
16
8
|
from upgini.utils.datetime_utils import DateTimeSearchKeyConverter
|
|
17
9
|
from upgini.utils.target_utils import define_task
|
|
@@ -151,8 +143,6 @@ def clean_full_duplicates(
|
|
|
151
143
|
unique_columns = df.columns.tolist()
|
|
152
144
|
if SYSTEM_RECORD_ID in unique_columns:
|
|
153
145
|
unique_columns.remove(SYSTEM_RECORD_ID)
|
|
154
|
-
if ENTITY_SYSTEM_RECORD_ID in unique_columns:
|
|
155
|
-
unique_columns.remove(ENTITY_SYSTEM_RECORD_ID)
|
|
156
146
|
if SORT_ID in unique_columns:
|
|
157
147
|
unique_columns.remove(SORT_ID)
|
|
158
148
|
if EVAL_SET_INDEX in unique_columns:
|
upgini/utils/email_utils.py
CHANGED
|
@@ -4,7 +4,7 @@ from hashlib import sha256
|
|
|
4
4
|
from typing import Dict, List, Optional
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
|
-
from pandas.api.types import is_string_dtype
|
|
7
|
+
from pandas.api.types import is_string_dtype, is_object_dtype
|
|
8
8
|
from upgini.resource_bundle import bundle
|
|
9
9
|
|
|
10
10
|
from upgini.metadata import SearchKey
|
|
@@ -18,7 +18,7 @@ class EmailSearchKeyDetector(BaseSearchKeyDetector):
|
|
|
18
18
|
return str(column_name).lower() in ["email", "e_mail", "e-mail"]
|
|
19
19
|
|
|
20
20
|
def _is_search_key_by_values(self, column: pd.Series) -> bool:
|
|
21
|
-
if not is_string_dtype(column):
|
|
21
|
+
if not is_string_dtype(column) and not is_object_dtype:
|
|
22
22
|
return False
|
|
23
23
|
if not column.astype("string").str.contains("@").any():
|
|
24
24
|
return False
|
|
@@ -38,13 +38,11 @@ class EmailSearchKeyConverter:
|
|
|
38
38
|
email_column: str,
|
|
39
39
|
hem_column: Optional[str],
|
|
40
40
|
search_keys: Dict[str, SearchKey],
|
|
41
|
-
unnest_search_keys: Optional[List[str]] = None,
|
|
42
41
|
logger: Optional[logging.Logger] = None,
|
|
43
42
|
):
|
|
44
43
|
self.email_column = email_column
|
|
45
44
|
self.hem_column = hem_column
|
|
46
45
|
self.search_keys = search_keys
|
|
47
|
-
self.unnest_search_keys = unnest_search_keys
|
|
48
46
|
if logger is not None:
|
|
49
47
|
self.logger = logger
|
|
50
48
|
else:
|
|
@@ -82,12 +80,9 @@ class EmailSearchKeyConverter:
|
|
|
82
80
|
del self.search_keys[self.email_column]
|
|
83
81
|
return df
|
|
84
82
|
self.search_keys[self.HEM_COLUMN_NAME] = SearchKey.HEM
|
|
85
|
-
self.unnest_search_keys.append(self.HEM_COLUMN_NAME)
|
|
86
83
|
self.email_converted_to_hem = True
|
|
87
84
|
|
|
88
85
|
del self.search_keys[self.email_column]
|
|
89
|
-
if self.email_column in self.unnest_search_keys:
|
|
90
|
-
self.unnest_search_keys.remove(self.email_column)
|
|
91
86
|
|
|
92
87
|
df[self.EMAIL_ONE_DOMAIN_COLUMN_NAME] = df[self.email_column].apply(self._email_to_one_domain)
|
|
93
88
|
|
|
@@ -81,7 +81,8 @@ class FeaturesValidator:
|
|
|
81
81
|
return [
|
|
82
82
|
i
|
|
83
83
|
for i in df
|
|
84
|
-
if (
|
|
84
|
+
if (is_object_dtype(df[i]) or is_string_dtype(df[i]) or is_integer_dtype(df[i]))
|
|
85
|
+
and (df[i].nunique(dropna=False) / row_count >= 0.85)
|
|
85
86
|
]
|
|
86
87
|
|
|
87
88
|
@staticmethod
|
upgini/utils/target_utils.py
CHANGED
|
@@ -107,7 +107,7 @@ def balance_undersample(
|
|
|
107
107
|
min_class_count = vc[min_class_value]
|
|
108
108
|
|
|
109
109
|
min_class_percent = imbalance_threshold / target_classes_count
|
|
110
|
-
min_class_threshold = min_class_percent * count
|
|
110
|
+
min_class_threshold = int(min_class_percent * count)
|
|
111
111
|
|
|
112
112
|
resampled_data = df
|
|
113
113
|
df = df.copy().sort_values(by=SYSTEM_RECORD_ID)
|
upgini/utils/track_info.py
CHANGED
|
@@ -55,7 +55,7 @@ def _get_execution_ide() -> str:
|
|
|
55
55
|
def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optional[str] = None) -> dict:
|
|
56
56
|
# default values
|
|
57
57
|
track = {"ide": _get_execution_ide()}
|
|
58
|
-
ident_res = "https://
|
|
58
|
+
ident_res = "https://api64.ipify.org"
|
|
59
59
|
|
|
60
60
|
try:
|
|
61
61
|
track["hostname"] = socket.gethostname()
|
|
@@ -74,17 +74,20 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
|
|
|
74
74
|
display(
|
|
75
75
|
Javascript(
|
|
76
76
|
"""
|
|
77
|
-
|
|
77
|
+
async function getVisitorId() {
|
|
78
|
+
return import('https://upgini.github.io/upgini/js/a.js')
|
|
78
79
|
.then(FingerprintJS => FingerprintJS.load())
|
|
79
80
|
.then(fp => fp.get())
|
|
80
|
-
.then(result =>
|
|
81
|
+
.then(result => result.visitorId);
|
|
82
|
+
}
|
|
81
83
|
"""
|
|
82
84
|
)
|
|
83
85
|
)
|
|
84
|
-
track["visitorId"] = output.eval_js("
|
|
86
|
+
track["visitorId"] = output.eval_js("getVisitorId()", timeout_sec=30)
|
|
85
87
|
except Exception as e:
|
|
86
88
|
track["err"] = str(e)
|
|
87
|
-
|
|
89
|
+
if "visitorId" not in track:
|
|
90
|
+
track["visitorId"] = "None"
|
|
88
91
|
if client_ip:
|
|
89
92
|
track["ip"] = client_ip
|
|
90
93
|
else:
|
|
@@ -95,16 +98,19 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
|
|
|
95
98
|
display(
|
|
96
99
|
Javascript(
|
|
97
100
|
f"""
|
|
98
|
-
|
|
101
|
+
async function getIP() {{
|
|
102
|
+
return fetch("{ident_res}")
|
|
99
103
|
.then(response => response.text())
|
|
100
|
-
.then(data =>
|
|
104
|
+
.then(data => data);
|
|
105
|
+
}}
|
|
101
106
|
"""
|
|
102
107
|
)
|
|
103
108
|
)
|
|
104
|
-
track["ip"] = output.eval_js("
|
|
109
|
+
track["ip"] = output.eval_js("getIP()", timeout_sec=10)
|
|
105
110
|
except Exception as e:
|
|
106
111
|
track["err"] = str(e)
|
|
107
|
-
|
|
112
|
+
if "ip" not in track:
|
|
113
|
+
track["ip"] = "0.0.0.0"
|
|
108
114
|
|
|
109
115
|
elif track["ide"] == "binder":
|
|
110
116
|
try:
|
|
@@ -116,8 +122,10 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
|
|
|
116
122
|
track["visitorId"] = sha256(os.environ["CLIENT_IP"].encode()).hexdigest()
|
|
117
123
|
except Exception as e:
|
|
118
124
|
track["err"] = str(e)
|
|
119
|
-
|
|
120
|
-
|
|
125
|
+
if "ip" not in track:
|
|
126
|
+
track["ip"] = "0.0.0.0"
|
|
127
|
+
if "visitorId" not in track:
|
|
128
|
+
track["visitorId"] = "None"
|
|
121
129
|
|
|
122
130
|
elif track["ide"] == "kaggle":
|
|
123
131
|
try:
|
|
@@ -136,8 +144,8 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
|
|
|
136
144
|
raise Exception(err)
|
|
137
145
|
except Exception as e:
|
|
138
146
|
track["err"] = str(e)
|
|
139
|
-
|
|
140
|
-
|
|
147
|
+
if "visitorId" not in track:
|
|
148
|
+
track["visitorId"] = "None"
|
|
141
149
|
else:
|
|
142
150
|
try:
|
|
143
151
|
if client_ip:
|
|
@@ -150,5 +158,9 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
|
|
|
150
158
|
track["visitorId"] = sha256(str(getnode()).encode()).hexdigest()
|
|
151
159
|
except Exception as e:
|
|
152
160
|
track["err"] = str(e)
|
|
161
|
+
if "visitorId" not in track:
|
|
162
|
+
track["visitorId"] = "None"
|
|
163
|
+
if "ip" not in track:
|
|
164
|
+
track["ip"] = "0.0.0.0"
|
|
153
165
|
|
|
154
166
|
return track
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.276
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Home-page: https://upgini.com/
|
|
6
6
|
Author: Upgini Developers
|
|
@@ -28,7 +28,7 @@ Description-Content-Type: text/markdown
|
|
|
28
28
|
License-File: LICENSE
|
|
29
29
|
Requires-Dist: python-dateutil >=2.8.0
|
|
30
30
|
Requires-Dist: requests >=2.8.0
|
|
31
|
-
Requires-Dist: pandas <
|
|
31
|
+
Requires-Dist: pandas <3.0.0,>=1.1.0
|
|
32
32
|
Requires-Dist: numpy >=1.19.0
|
|
33
33
|
Requires-Dist: scikit-learn >=1.3.0
|
|
34
34
|
Requires-Dist: pydantic <2.0.0,>=1.8.2
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
|
|
2
|
-
upgini/ads.py,sha256=
|
|
3
|
-
upgini/dataset.py,sha256=
|
|
2
|
+
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
3
|
+
upgini/dataset.py,sha256=HwL2syoMf3F9k9SmsJJMhhqnAddZcx28RZ1aYam7Lhs,45665
|
|
4
4
|
upgini/errors.py,sha256=pdzQl3MKuK52yvncxMWMRWeSIOGhUFzpQoszoRFBOk0,958
|
|
5
|
-
upgini/features_enricher.py,sha256=
|
|
5
|
+
upgini/features_enricher.py,sha256=ys7RQoZsyY8-NkUZyp12K8z5aQmg7pyx0LtwclFtXkc,176358
|
|
6
6
|
upgini/fingerprint.js,sha256=VygVIQlN1v4NGZfjHqtRogOw8zjTnnMNJg_f7M5iGQU,33442
|
|
7
7
|
upgini/http.py,sha256=zaO86LBBLmkieGbgYifk29eVoPCxXimZQ8YkQtKcM0I,42244
|
|
8
|
-
upgini/metadata.py,sha256=
|
|
9
|
-
upgini/metrics.py,sha256=
|
|
8
|
+
upgini/metadata.py,sha256=fwVxtkR6Mn4iRoOqV6BfMJvJrx65I3YwZUMbZjhPyOI,9673
|
|
9
|
+
upgini/metrics.py,sha256=tGzdn0jgup86OlH_GS4eoza8ZJZ9wgaJr7SaX3Upwzo,29652
|
|
10
10
|
upgini/search_task.py,sha256=tmJ17WUxv3J5NWrYUJB_NKdZ792Ifz8Z8UnDXeQnpss,17077
|
|
11
11
|
upgini/spinner.py,sha256=Dm1dQ5F_z_Ua2odLxZX7OypcOX9tSx_vE5MGaKtUmfw,1118
|
|
12
12
|
upgini/version_validator.py,sha256=rDIncP6BEko4J2F2hUcMOtKm_vZbI4ICWcNcw8hrwM4,1400
|
|
@@ -15,49 +15,49 @@ upgini/ads_management/ads_manager.py,sha256=fP4Yqx3h2Snw5X335TbXEwFoupq1RYsE7y0P
|
|
|
15
15
|
upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
16
|
upgini/autofe/all_operands.py,sha256=H66wqVLD-H9k8A4-q2wslhV9QaNxlb49f8YiT0Xfkps,2356
|
|
17
17
|
upgini/autofe/binary.py,sha256=f8LQqZi9zyaMUAv-jASMmWNA_vT05ncYCjZq0qx3USs,3972
|
|
18
|
-
upgini/autofe/date.py,sha256=
|
|
18
|
+
upgini/autofe/date.py,sha256=408p8P2OTPM2D3LsEGGtaiCepKGgM1BbOCQNRzAmI6c,4223
|
|
19
19
|
upgini/autofe/feature.py,sha256=2FQRGtIumNz60hFAjfLReaY18SI7HxzYZOoC5avzSjQ,11847
|
|
20
20
|
upgini/autofe/groupby.py,sha256=iXRfOmOc84ooSzRhsh9GmmG7rTafX0-ekXko8s9Qs68,3089
|
|
21
21
|
upgini/autofe/operand.py,sha256=dhtToPDGWtP_0u_RjayUpezJJZAgq_TzNbPH0bI9OXI,2805
|
|
22
22
|
upgini/autofe/unary.py,sha256=YRTzQLttbDdOnkogWBPnBexpu7uHWSLSFAxSCu3iFdY,3145
|
|
23
23
|
upgini/autofe/vector.py,sha256=5qhI_bdwaWM1l7fgCkx1tMt9R9gxWzoYCl-7WO4KiOs,604
|
|
24
24
|
upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
-
upgini/data_source/data_source_publisher.py,sha256=
|
|
25
|
+
upgini/data_source/data_source_publisher.py,sha256=taRzyGgrPrTTSGw4Y-Ca5k4bf30aiTa68rxqT9zfqeI,16478
|
|
26
26
|
upgini/mdc/__init__.py,sha256=ETDh3JKbrDdPMOECiYLAa8lvKYe68mv4IY6fZa9FimA,1126
|
|
27
27
|
upgini/mdc/context.py,sha256=Sl1S_InKlzzRxYqwJ2k24lawJdCKWgGJ-RIRfvzWJrk,1468
|
|
28
28
|
upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
|
-
upgini/normalizer/phone_normalizer.py,sha256=
|
|
29
|
+
upgini/normalizer/phone_normalizer.py,sha256=_SYMX4GTgwzRXArK54Jp3vUBE5d4jZxSVyze-0tqzg0,9996
|
|
30
30
|
upgini/resource_bundle/__init__.py,sha256=hdvbqL0b0xMWbY6-kiYGsW1ro2GMiWpxxsO9uCv-h9Q,8379
|
|
31
31
|
upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
|
|
32
|
-
upgini/resource_bundle/strings.properties,sha256=
|
|
32
|
+
upgini/resource_bundle/strings.properties,sha256=1O779a0-Ai0j7W-Z5AznvjuV69YkJvgGhJda-6VMLOQ,26287
|
|
33
33
|
upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
|
|
34
34
|
upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
35
|
upgini/sampler/base.py,sha256=CC-DvPbrN7zp5--SVFuUqkVmdWM_5F7R0Do98ETV82U,6421
|
|
36
36
|
upgini/sampler/random_under_sampler.py,sha256=XU4c2swPIFxVXHOPpxgM2bUao0Xm-aoMmd6fKjIuV5s,4068
|
|
37
37
|
upgini/sampler/utils.py,sha256=PYOk3kKSnFlyxcpdtDNLBEEhTB4lO_iP7pQHqeUcmAc,20211
|
|
38
|
-
upgini/utils/__init__.py,sha256=
|
|
39
|
-
upgini/utils/base_search_key_detector.py,sha256=
|
|
38
|
+
upgini/utils/__init__.py,sha256=YVum3lRKpyfqoJy_7HJyU6SmIgbmG8QLkHIpibE_ud8,842
|
|
39
|
+
upgini/utils/base_search_key_detector.py,sha256=DGwhXLvc8i5VZWMDr0rncFfV5GEHdsCSnLGon_W9TPs,859
|
|
40
40
|
upgini/utils/blocked_time_series.py,sha256=dMz5ewk3PsoeOrc3lDzInCVPS9u_2XQkV0W6PuMMjPg,3380
|
|
41
|
-
upgini/utils/country_utils.py,sha256=
|
|
41
|
+
upgini/utils/country_utils.py,sha256=pV8TBURthYqwSOfH1lxfYc2blm3OvfLFCMvRv8rKTp4,6511
|
|
42
42
|
upgini/utils/custom_loss_utils.py,sha256=DBslpjWGPt7xTeypt78baR59012SYphbPsO_YLKdilo,3972
|
|
43
43
|
upgini/utils/cv_utils.py,sha256=Tn01RJvpZGZh0PUQUimlBkV-AXwe7s6yjCNFtw352Uc,3525
|
|
44
|
-
upgini/utils/datetime_utils.py,sha256=
|
|
45
|
-
upgini/utils/deduplicate_utils.py,sha256=
|
|
44
|
+
upgini/utils/datetime_utils.py,sha256=_mfhWb5ogEThvanQ-py1Lb6VvUvF2vT20tQgNprNz6o,10321
|
|
45
|
+
upgini/utils/deduplicate_utils.py,sha256=6AbARehUCghJZ4PppFtrej2s3gFRruh41MEm6mzakHs,8607
|
|
46
46
|
upgini/utils/display_utils.py,sha256=LKoSwjrE0xgS5_cqVhc2og2CQ1UCZ1nTI2VKboIhoQA,10858
|
|
47
|
-
upgini/utils/email_utils.py,sha256=
|
|
47
|
+
upgini/utils/email_utils.py,sha256=R9bVOfbS-oVkA8PdwZfQBxm7B4mQlRtkwqx2cf6zPCY,3520
|
|
48
48
|
upgini/utils/fallback_progress_bar.py,sha256=cdbd1XGcWm4Ed4eAqV2_St3z7uC_kkH22gEyrN5ub6M,1090
|
|
49
|
-
upgini/utils/features_validator.py,sha256=
|
|
49
|
+
upgini/utils/features_validator.py,sha256=PgKNt5dyqfErTvjtRNNUS9g7GFqHBtAtnsfA-V5UO1A,3307
|
|
50
50
|
upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
|
|
51
51
|
upgini/utils/ip_utils.py,sha256=Zf3F2cnQmOCH09QLQHetpjMFu1PnD0cTmDymn0SnSy8,1672
|
|
52
52
|
upgini/utils/phone_utils.py,sha256=JNSkF8G6mgsN8Czy11pamaJdsY6rBINEMpi7jbVt_RA,408
|
|
53
53
|
upgini/utils/postal_code_utils.py,sha256=_8CR9tBqsPptQsmMUvnrCAmBaMIQSWH3JfJ4ly3x_zs,409
|
|
54
54
|
upgini/utils/progress_bar.py,sha256=iNXyqT3vKCeHpfiG5HHwr7Lk2cTtKViM93Fl8iZnjGc,1564
|
|
55
55
|
upgini/utils/sklearn_ext.py,sha256=e1aMNXk1zUt7uFnl0FcUF0zOnaXSE7z5xBHmJPknUVs,44014
|
|
56
|
-
upgini/utils/target_utils.py,sha256=
|
|
57
|
-
upgini/utils/track_info.py,sha256=
|
|
56
|
+
upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
|
|
57
|
+
upgini/utils/track_info.py,sha256=p8gmuHhLamZF5JG7K9DeK-PcytQhlFCR29lyRr-wq_U,5665
|
|
58
58
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
59
|
-
upgini-1.1.
|
|
60
|
-
upgini-1.1.
|
|
61
|
-
upgini-1.1.
|
|
62
|
-
upgini-1.1.
|
|
63
|
-
upgini-1.1.
|
|
59
|
+
upgini-1.1.276.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
60
|
+
upgini-1.1.276.dist-info/METADATA,sha256=Dgb4UJ82UknhtKS9DHiGRu-a9i3LeoKZiVWpCzkJfF4,48156
|
|
61
|
+
upgini-1.1.276.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
62
|
+
upgini-1.1.276.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
|
|
63
|
+
upgini-1.1.276.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|