upgini 1.1.278a1__py3-none-any.whl → 1.1.279__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- upgini/__about__.py +1 -0
- upgini/ads_management/ads_manager.py +4 -2
- upgini/autofe/all_operands.py +3 -2
- upgini/autofe/binary.py +2 -1
- upgini/autofe/date.py +2 -1
- upgini/autofe/feature.py +1 -1
- upgini/autofe/groupby.py +3 -1
- upgini/autofe/operand.py +4 -3
- upgini/autofe/unary.py +2 -1
- upgini/autofe/vector.py +2 -0
- upgini/dataset.py +6 -15
- upgini/errors.py +1 -1
- upgini/features_enricher.py +102 -214
- upgini/http.py +11 -10
- upgini/mdc/__init__.py +1 -3
- upgini/mdc/context.py +4 -6
- upgini/metadata.py +5 -10
- upgini/metrics.py +102 -100
- upgini/normalizer/phone_normalizer.py +1 -1
- upgini/resource_bundle/__init__.py +5 -5
- upgini/resource_bundle/strings.properties +0 -1
- upgini/sampler/base.py +1 -4
- upgini/sampler/random_under_sampler.py +2 -5
- upgini/search_task.py +4 -4
- upgini/spinner.py +1 -1
- upgini/utils/__init__.py +1 -1
- upgini/utils/base_search_key_detector.py +14 -16
- upgini/utils/blocked_time_series.py +4 -2
- upgini/utils/country_utils.py +1 -1
- upgini/utils/custom_loss_utils.py +3 -2
- upgini/utils/cv_utils.py +2 -2
- upgini/utils/datetime_utils.py +20 -15
- upgini/utils/deduplicate_utils.py +1 -11
- upgini/utils/email_utils.py +2 -7
- upgini/utils/fallback_progress_bar.py +1 -1
- upgini/utils/progress_bar.py +1 -1
- upgini/utils/sklearn_ext.py +14 -13
- upgini/utils/track_info.py +2 -2
- upgini/version_validator.py +2 -2
- {upgini-1.1.278a1.dist-info → upgini-1.1.279.dist-info}/METADATA +21 -23
- upgini-1.1.279.dist-info/RECORD +62 -0
- {upgini-1.1.278a1.dist-info → upgini-1.1.279.dist-info}/WHEEL +1 -2
- upgini/fingerprint.js +0 -8
- upgini-1.1.278a1.dist-info/RECORD +0 -63
- upgini-1.1.278a1.dist-info/top_level.txt +0 -1
- {upgini-1.1.278a1.dist-info → upgini-1.1.279.dist-info/licenses}/LICENSE +0 -0
upgini/search_task.py
CHANGED
|
@@ -8,10 +8,10 @@ import pandas as pd
|
|
|
8
8
|
|
|
9
9
|
from upgini import dataset
|
|
10
10
|
from upgini.http import (
|
|
11
|
-
_RestClient,
|
|
12
11
|
ProviderTaskSummary,
|
|
13
12
|
SearchProgress,
|
|
14
13
|
SearchTaskSummary,
|
|
14
|
+
_RestClient,
|
|
15
15
|
get_rest_client,
|
|
16
16
|
is_demo_api_key,
|
|
17
17
|
)
|
|
@@ -295,7 +295,7 @@ class SearchTask:
|
|
|
295
295
|
return self.rest_client.get_search_file_metadata(self.search_task_id, trace_id)
|
|
296
296
|
|
|
297
297
|
|
|
298
|
-
@lru_cache
|
|
298
|
+
@lru_cache
|
|
299
299
|
def _get_all_initial_raw_features_cached(
|
|
300
300
|
endpoint: Optional[str],
|
|
301
301
|
api_key: Optional[str],
|
|
@@ -328,7 +328,7 @@ def _get_all_initial_raw_features_cached(
|
|
|
328
328
|
return result_df
|
|
329
329
|
|
|
330
330
|
|
|
331
|
-
@lru_cache
|
|
331
|
+
@lru_cache
|
|
332
332
|
def _get_all_validation_raw_features_cached(
|
|
333
333
|
endpoint: Optional[str],
|
|
334
334
|
api_key: Optional[str],
|
|
@@ -357,7 +357,7 @@ def _get_all_validation_raw_features_cached(
|
|
|
357
357
|
return result_df
|
|
358
358
|
|
|
359
359
|
|
|
360
|
-
@lru_cache
|
|
360
|
+
@lru_cache
|
|
361
361
|
def _get_target_outliers_cached(
|
|
362
362
|
endpoint: Optional[str],
|
|
363
363
|
api_key: Optional[str],
|
upgini/spinner.py
CHANGED
upgini/utils/__init__.py
CHANGED
|
@@ -2,7 +2,7 @@ import itertools
|
|
|
2
2
|
from typing import List, Tuple
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
|
-
from pandas.api.types import
|
|
5
|
+
from pandas.api.types import is_object_dtype, is_string_dtype
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def combine_search_keys(search_keys: List[str]) -> List[Tuple[str]]:
|
|
@@ -1,27 +1,25 @@
|
|
|
1
|
-
from typing import List
|
|
1
|
+
from typing import List, Optional
|
|
2
2
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class BaseSearchKeyDetector:
|
|
7
7
|
def _is_search_key_by_name(self, column_name: str) -> bool:
|
|
8
|
-
raise NotImplementedError
|
|
8
|
+
raise NotImplementedError
|
|
9
9
|
|
|
10
10
|
def _is_search_key_by_values(self, column: pd.Series) -> bool:
|
|
11
|
-
raise NotImplementedError
|
|
11
|
+
raise NotImplementedError
|
|
12
12
|
|
|
13
|
-
def
|
|
14
|
-
|
|
15
|
-
column_name
|
|
16
|
-
|
|
17
|
-
if self._is_search_key_by_name(column_name)
|
|
18
|
-
]
|
|
13
|
+
def _get_search_key_by_name(self, column_names: List[str]) -> Optional[str]:
|
|
14
|
+
for column_name in column_names:
|
|
15
|
+
if self._is_search_key_by_name(column_name):
|
|
16
|
+
return column_name
|
|
19
17
|
|
|
20
|
-
def
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
18
|
+
def get_search_key_column(self, df: pd.DataFrame) -> Optional[str]:
|
|
19
|
+
maybe_column = self._get_search_key_by_name(df.columns.to_list())
|
|
20
|
+
if maybe_column is not None:
|
|
21
|
+
return maybe_column
|
|
22
|
+
|
|
23
|
+
for column_name in df.columns:
|
|
25
24
|
if self._is_search_key_by_values(df[column_name]):
|
|
26
|
-
|
|
27
|
-
return list(set(columns_by_names + columns_by_values))
|
|
25
|
+
return column_name
|
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
import numpy as np
|
|
2
1
|
import numbers
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from sklearn.model_selection import BaseCrossValidator
|
|
3
5
|
from sklearn.utils import indexable
|
|
4
6
|
from sklearn.utils.validation import _num_samples
|
|
5
|
-
|
|
7
|
+
|
|
6
8
|
from upgini.resource_bundle import bundle
|
|
7
9
|
|
|
8
10
|
|
upgini/utils/country_utils.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
from upgini.metadata import ModelTaskType, RuntimeParameters
|
|
2
|
-
from typing import Optional, Dict, Any
|
|
3
1
|
import logging
|
|
2
|
+
from typing import Any, Dict, Optional
|
|
3
|
+
|
|
4
|
+
from upgini.metadata import ModelTaskType, RuntimeParameters
|
|
4
5
|
from upgini.resource_bundle import bundle
|
|
5
6
|
|
|
6
7
|
|
upgini/utils/cv_utils.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from functools import reduce
|
|
2
2
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
3
|
-
import numpy as np
|
|
4
3
|
|
|
4
|
+
import numpy as np
|
|
5
5
|
import pandas as pd
|
|
6
|
-
from sklearn.model_selection import BaseCrossValidator,
|
|
6
|
+
from sklearn.model_selection import BaseCrossValidator, GroupKFold, GroupShuffleSplit, KFold, TimeSeriesSplit
|
|
7
7
|
|
|
8
8
|
from upgini.metadata import CVType
|
|
9
9
|
from upgini.utils.blocked_time_series import BlockedTimeSeriesSplit
|
upgini/utils/datetime_utils.py
CHANGED
|
@@ -208,18 +208,17 @@ def is_blocked_time_series(df: pd.DataFrame, date_col: str, search_keys: List[st
|
|
|
208
208
|
if nunique_dates / days_delta < 0.3:
|
|
209
209
|
return False
|
|
210
210
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
def is_multiple_rows(group):
|
|
211
|
+
accumulated_changing_columns = set()
|
|
212
|
+
|
|
213
|
+
def check_differences(group: pd.DataFrame):
|
|
214
|
+
changing_columns = group.columns[group.nunique(dropna=False) > 1].to_list()
|
|
215
|
+
accumulated_changing_columns.update(changing_columns)
|
|
216
|
+
|
|
217
|
+
def is_multiple_rows(group: pd.DataFrame) -> bool:
|
|
219
218
|
return group.shape[0] > 1
|
|
220
219
|
|
|
221
|
-
grouped = df.groupby(date_col)
|
|
222
|
-
dates_with_multiple_rows =
|
|
220
|
+
grouped = df.groupby(date_col)[[c for c in df.columns if c != date_col]]
|
|
221
|
+
dates_with_multiple_rows = grouped.apply(is_multiple_rows).sum()
|
|
223
222
|
|
|
224
223
|
# share of dates with more than one record is more than 99%
|
|
225
224
|
if dates_with_multiple_rows / nunique_dates < 0.99:
|
|
@@ -228,8 +227,8 @@ def is_blocked_time_series(df: pd.DataFrame, date_col: str, search_keys: List[st
|
|
|
228
227
|
if df.shape[1] <= 3:
|
|
229
228
|
return True
|
|
230
229
|
|
|
231
|
-
|
|
232
|
-
return
|
|
230
|
+
grouped.apply(check_differences)
|
|
231
|
+
return len(accumulated_changing_columns) <= 2
|
|
233
232
|
|
|
234
233
|
|
|
235
234
|
def validate_dates_distribution(
|
|
@@ -249,8 +248,11 @@ def validate_dates_distribution(
|
|
|
249
248
|
if col in search_keys:
|
|
250
249
|
continue
|
|
251
250
|
try:
|
|
252
|
-
|
|
253
|
-
|
|
251
|
+
if pd.__version__ >= "2.0.0":
|
|
252
|
+
# Format mixed to avoid massive warnings
|
|
253
|
+
pd.to_datetime(X[col], format="mixed")
|
|
254
|
+
else:
|
|
255
|
+
pd.to_datetime(X[col])
|
|
254
256
|
maybe_date_col = col
|
|
255
257
|
break
|
|
256
258
|
except Exception:
|
|
@@ -259,7 +261,10 @@ def validate_dates_distribution(
|
|
|
259
261
|
if maybe_date_col is None:
|
|
260
262
|
return
|
|
261
263
|
|
|
262
|
-
|
|
264
|
+
if pd.__version__ >= "2.0.0":
|
|
265
|
+
dates = pd.to_datetime(X[maybe_date_col], format="mixed").dt.date
|
|
266
|
+
else:
|
|
267
|
+
dates = pd.to_datetime(X[maybe_date_col]).dt.date
|
|
263
268
|
|
|
264
269
|
date_counts = dates.value_counts().sort_index()
|
|
265
270
|
|
|
@@ -3,15 +3,7 @@ from typing import Dict, List, Optional, Union
|
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
|
-
from upgini.metadata import
|
|
7
|
-
ENTITY_SYSTEM_RECORD_ID,
|
|
8
|
-
EVAL_SET_INDEX,
|
|
9
|
-
SORT_ID,
|
|
10
|
-
SYSTEM_RECORD_ID,
|
|
11
|
-
TARGET,
|
|
12
|
-
ModelTaskType,
|
|
13
|
-
SearchKey,
|
|
14
|
-
)
|
|
6
|
+
from upgini.metadata import EVAL_SET_INDEX, SORT_ID, SYSTEM_RECORD_ID, TARGET, ModelTaskType, SearchKey
|
|
15
7
|
from upgini.resource_bundle import ResourceBundle
|
|
16
8
|
from upgini.utils.datetime_utils import DateTimeSearchKeyConverter
|
|
17
9
|
from upgini.utils.target_utils import define_task
|
|
@@ -151,8 +143,6 @@ def clean_full_duplicates(
|
|
|
151
143
|
unique_columns = df.columns.tolist()
|
|
152
144
|
if SYSTEM_RECORD_ID in unique_columns:
|
|
153
145
|
unique_columns.remove(SYSTEM_RECORD_ID)
|
|
154
|
-
if ENTITY_SYSTEM_RECORD_ID in unique_columns:
|
|
155
|
-
unique_columns.remove(ENTITY_SYSTEM_RECORD_ID)
|
|
156
146
|
if SORT_ID in unique_columns:
|
|
157
147
|
unique_columns.remove(SORT_ID)
|
|
158
148
|
if EVAL_SET_INDEX in unique_columns:
|
upgini/utils/email_utils.py
CHANGED
|
@@ -4,10 +4,10 @@ from hashlib import sha256
|
|
|
4
4
|
from typing import Dict, List, Optional
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
|
-
from pandas.api.types import
|
|
8
|
-
from upgini.resource_bundle import bundle
|
|
7
|
+
from pandas.api.types import is_object_dtype, is_string_dtype
|
|
9
8
|
|
|
10
9
|
from upgini.metadata import SearchKey
|
|
10
|
+
from upgini.resource_bundle import bundle
|
|
11
11
|
from upgini.utils.base_search_key_detector import BaseSearchKeyDetector
|
|
12
12
|
|
|
13
13
|
EMAIL_REGEX = re.compile(r"^[a-zA-Z0-9.!#$%&’*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*$")
|
|
@@ -38,13 +38,11 @@ class EmailSearchKeyConverter:
|
|
|
38
38
|
email_column: str,
|
|
39
39
|
hem_column: Optional[str],
|
|
40
40
|
search_keys: Dict[str, SearchKey],
|
|
41
|
-
unnest_search_keys: Optional[List[str]] = None,
|
|
42
41
|
logger: Optional[logging.Logger] = None,
|
|
43
42
|
):
|
|
44
43
|
self.email_column = email_column
|
|
45
44
|
self.hem_column = hem_column
|
|
46
45
|
self.search_keys = search_keys
|
|
47
|
-
self.unnest_search_keys = unnest_search_keys
|
|
48
46
|
if logger is not None:
|
|
49
47
|
self.logger = logger
|
|
50
48
|
else:
|
|
@@ -82,12 +80,9 @@ class EmailSearchKeyConverter:
|
|
|
82
80
|
del self.search_keys[self.email_column]
|
|
83
81
|
return df
|
|
84
82
|
self.search_keys[self.HEM_COLUMN_NAME] = SearchKey.HEM
|
|
85
|
-
self.unnest_search_keys.append(self.HEM_COLUMN_NAME)
|
|
86
83
|
self.email_converted_to_hem = True
|
|
87
84
|
|
|
88
85
|
del self.search_keys[self.email_column]
|
|
89
|
-
if self.email_column in self.unnest_search_keys:
|
|
90
|
-
self.unnest_search_keys.remove(self.email_column)
|
|
91
86
|
|
|
92
87
|
df[self.EMAIL_ONE_DOMAIN_COLUMN_NAME] = df[self.email_column].apply(self._email_to_one_domain)
|
|
93
88
|
|
|
@@ -22,7 +22,7 @@ class CustomFallbackProgressBar:
|
|
|
22
22
|
fraction = self.progress / self.total
|
|
23
23
|
filled = "=" * int(fraction * self.text_width)
|
|
24
24
|
rest = " " * (self.text_width - len(filled))
|
|
25
|
-
return "[{}{}] {}% {
|
|
25
|
+
return f"[{filled}{rest}] {self.progress}% {self._stage} {self._eta}"
|
|
26
26
|
|
|
27
27
|
def display(self):
|
|
28
28
|
print(self)
|
upgini/utils/progress_bar.py
CHANGED
|
@@ -28,7 +28,7 @@ class CustomProgressBar(DisplayObject):
|
|
|
28
28
|
fraction = self.progress / self.total
|
|
29
29
|
filled = "=" * int(fraction * self.text_width)
|
|
30
30
|
rest = " " * (self.text_width - len(filled))
|
|
31
|
-
return "[{}{}] {}% {
|
|
31
|
+
return f"[{filled}{rest}] {self.progress}% {self._stage}"
|
|
32
32
|
|
|
33
33
|
def _repr_html_(self):
|
|
34
34
|
return "<progress style='width:{}' max='{}' value='{}'></progress> {}% {}</br>{}".format(
|
upgini/utils/sklearn_ext.py
CHANGED
|
@@ -20,6 +20,7 @@ from sklearn.metrics._scorer import _MultimetricScorer
|
|
|
20
20
|
from sklearn.model_selection import check_cv
|
|
21
21
|
from sklearn.utils.fixes import np_version, parse_version
|
|
22
22
|
from sklearn.utils.validation import indexable
|
|
23
|
+
|
|
23
24
|
# from sklearn.model_selection import cross_validate as original_cross_validate
|
|
24
25
|
|
|
25
26
|
_DEFAULT_TAGS = {
|
|
@@ -46,7 +47,7 @@ _DEFAULT_TAGS = {
|
|
|
46
47
|
|
|
47
48
|
def cross_validate(
|
|
48
49
|
estimator,
|
|
49
|
-
|
|
50
|
+
x,
|
|
50
51
|
y=None,
|
|
51
52
|
*,
|
|
52
53
|
groups=None,
|
|
@@ -69,7 +70,7 @@ def cross_validate(
|
|
|
69
70
|
estimator : estimator object implementing 'fit'
|
|
70
71
|
The object to use to fit the data.
|
|
71
72
|
|
|
72
|
-
|
|
73
|
+
x : array-like of shape (n_samples, n_features)
|
|
73
74
|
The data to fit. Can be for example a list, or an array.
|
|
74
75
|
|
|
75
76
|
y : array-like of shape (n_samples,) or (n_samples, n_outputs), \
|
|
@@ -250,7 +251,7 @@ def cross_validate(
|
|
|
250
251
|
|
|
251
252
|
"""
|
|
252
253
|
try:
|
|
253
|
-
|
|
254
|
+
x, y, groups = indexable(x, y, groups)
|
|
254
255
|
|
|
255
256
|
cv = check_cv(cv, y, classifier=is_classifier(estimator))
|
|
256
257
|
|
|
@@ -267,7 +268,7 @@ def cross_validate(
|
|
|
267
268
|
results = parallel(
|
|
268
269
|
delayed(_fit_and_score)(
|
|
269
270
|
clone(estimator),
|
|
270
|
-
|
|
271
|
+
x,
|
|
271
272
|
y,
|
|
272
273
|
scorers,
|
|
273
274
|
train,
|
|
@@ -280,7 +281,7 @@ def cross_validate(
|
|
|
280
281
|
return_estimator=return_estimator,
|
|
281
282
|
error_score=error_score,
|
|
282
283
|
)
|
|
283
|
-
for train, test in cv.split(
|
|
284
|
+
for train, test in cv.split(x, y, groups)
|
|
284
285
|
)
|
|
285
286
|
|
|
286
287
|
_warn_about_fit_failures(results, error_score)
|
|
@@ -487,7 +488,7 @@ def _fit_and_score(
|
|
|
487
488
|
if y_train is None:
|
|
488
489
|
estimator.fit(X_train, **fit_params)
|
|
489
490
|
else:
|
|
490
|
-
if isinstance(estimator, CatBoostClassifier
|
|
491
|
+
if isinstance(estimator, (CatBoostClassifier, CatBoostRegressor)):
|
|
491
492
|
fit_params = fit_params.copy()
|
|
492
493
|
fit_params["eval_set"] = [(X_test, y_test)]
|
|
493
494
|
estimator.fit(X_train, y_train, **fit_params)
|
|
@@ -582,9 +583,11 @@ def _aggregate_score_dicts(scores):
|
|
|
582
583
|
"""
|
|
583
584
|
|
|
584
585
|
return {
|
|
585
|
-
key:
|
|
586
|
-
|
|
587
|
-
|
|
586
|
+
key: (
|
|
587
|
+
np.asarray([score[key] for score in scores])
|
|
588
|
+
if isinstance(scores[0][key], numbers.Number)
|
|
589
|
+
else [score[key] for score in scores]
|
|
590
|
+
)
|
|
588
591
|
for key in scores[0]
|
|
589
592
|
}
|
|
590
593
|
|
|
@@ -969,9 +972,7 @@ def _safe_indexing(X, indices, *, axis=0):
|
|
|
969
972
|
return X
|
|
970
973
|
|
|
971
974
|
if axis not in (0, 1):
|
|
972
|
-
raise ValueError(
|
|
973
|
-
"'axis' should be either 0 (to index rows) or 1 (to index " " column). Got {} instead.".format(axis)
|
|
974
|
-
)
|
|
975
|
+
raise ValueError("'axis' should be either 0 (to index rows) or 1 (to index " f" column). Got {axis} instead.")
|
|
975
976
|
|
|
976
977
|
indices_dtype = _determine_key_type(indices)
|
|
977
978
|
|
|
@@ -982,7 +983,7 @@ def _safe_indexing(X, indices, *, axis=0):
|
|
|
982
983
|
raise ValueError(
|
|
983
984
|
"'X' should be a 2D NumPy array, 2D sparse matrix or pandas "
|
|
984
985
|
"dataframe when indexing the columns (i.e. 'axis=1'). "
|
|
985
|
-
"Got {} instead with {} dimension(s)."
|
|
986
|
+
f"Got {type(X)} instead with {X.ndim} dimension(s)."
|
|
986
987
|
)
|
|
987
988
|
|
|
988
989
|
if axis == 1 and indices_dtype == "str" and not hasattr(X, "loc"):
|
upgini/utils/track_info.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from functools import lru_cache
|
|
2
1
|
import os
|
|
3
2
|
import re
|
|
4
3
|
import socket
|
|
5
4
|
import sys
|
|
5
|
+
from functools import lru_cache
|
|
6
6
|
from getpass import getuser
|
|
7
7
|
from hashlib import sha256
|
|
8
8
|
from typing import Optional
|
|
@@ -51,7 +51,7 @@ def _get_execution_ide() -> str:
|
|
|
51
51
|
return "other"
|
|
52
52
|
|
|
53
53
|
|
|
54
|
-
@lru_cache
|
|
54
|
+
@lru_cache
|
|
55
55
|
def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optional[str] = None) -> dict:
|
|
56
56
|
# default values
|
|
57
57
|
track = {"ide": _get_execution_ide()}
|
upgini/version_validator.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import threading
|
|
2
3
|
|
|
3
4
|
import requests
|
|
4
|
-
import threading
|
|
5
5
|
|
|
6
6
|
try:
|
|
7
7
|
from packaging.version import parse
|
|
@@ -10,7 +10,7 @@ except ImportError:
|
|
|
10
10
|
|
|
11
11
|
import logging
|
|
12
12
|
|
|
13
|
-
from upgini.
|
|
13
|
+
from upgini.__about__ import __version__
|
|
14
14
|
from upgini.resource_bundle import bundle
|
|
15
15
|
|
|
16
16
|
URL_PATTERN = "https://pypi.python.org/pypi/{package}/json"
|
|
@@ -1,14 +1,13 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.279
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
|
-
Home-page: https://upgini.com/
|
|
6
|
-
Author: Upgini Developers
|
|
7
|
-
Author-email: madewithlove@upgini.com
|
|
8
|
-
License: BSD 3-Clause License
|
|
9
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
|
+
Project-URL: Homepage, https://upgini.com/
|
|
10
7
|
Project-URL: Source, https://github.com/upgini/upgini
|
|
11
|
-
|
|
8
|
+
Author-email: Upgini Developers <madewithlove@upgini.com>
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: automl,data mining,data science,data search,machine learning
|
|
12
11
|
Classifier: Development Status :: 5 - Production/Stable
|
|
13
12
|
Classifier: Intended Audience :: Customer Service
|
|
14
13
|
Classifier: Intended Audience :: Developers
|
|
@@ -23,22 +22,21 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
23
22
|
Classifier: Programming Language :: Python :: 3.10
|
|
24
23
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
25
24
|
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
26
|
-
Requires-Python:
|
|
25
|
+
Requires-Python: <3.11,>=3.8
|
|
26
|
+
Requires-Dist: catboost>=1.0.3
|
|
27
|
+
Requires-Dist: fastparquet>=0.8.1
|
|
28
|
+
Requires-Dist: ipywidgets>=8.1.0
|
|
29
|
+
Requires-Dist: lightgbm>=3.3.2
|
|
30
|
+
Requires-Dist: numpy>=1.19.0
|
|
31
|
+
Requires-Dist: pandas<3.0.0,>=1.1.0
|
|
32
|
+
Requires-Dist: pydantic<2.0.0,>=1.8.2
|
|
33
|
+
Requires-Dist: pyjwt>=2.8.0
|
|
34
|
+
Requires-Dist: python-dateutil>=2.8.0
|
|
35
|
+
Requires-Dist: python-json-logger>=2.0.2
|
|
36
|
+
Requires-Dist: requests>=2.8.0
|
|
37
|
+
Requires-Dist: scikit-learn>=1.3.0
|
|
38
|
+
Requires-Dist: xhtml2pdf==0.2.11
|
|
27
39
|
Description-Content-Type: text/markdown
|
|
28
|
-
License-File: LICENSE
|
|
29
|
-
Requires-Dist: python-dateutil >=2.8.0
|
|
30
|
-
Requires-Dist: requests >=2.8.0
|
|
31
|
-
Requires-Dist: pandas <3.0.0,>=1.1.0
|
|
32
|
-
Requires-Dist: numpy >=1.19.0
|
|
33
|
-
Requires-Dist: scikit-learn >=1.3.0
|
|
34
|
-
Requires-Dist: pydantic <2.0.0,>=1.8.2
|
|
35
|
-
Requires-Dist: fastparquet >=0.8.1
|
|
36
|
-
Requires-Dist: python-json-logger >=2.0.2
|
|
37
|
-
Requires-Dist: catboost >=1.0.3
|
|
38
|
-
Requires-Dist: lightgbm >=3.3.2
|
|
39
|
-
Requires-Dist: pyjwt >=2.8.0
|
|
40
|
-
Requires-Dist: xhtml2pdf ==0.2.11
|
|
41
|
-
Requires-Dist: ipywidgets >=8.1.0
|
|
42
40
|
|
|
43
41
|
|
|
44
42
|
<!-- <h2 align="center"> <a href="https://upgini.com/">Upgini</a> : low-code feature search and enrichment library for machine learning </h2> -->
|
|
@@ -841,4 +839,4 @@ Some convenient ways to start contributing are:
|
|
|
841
839
|
- [More perks for registered users](https://profile.upgini.com)
|
|
842
840
|
|
|
843
841
|
<sup>😔 Found mistype or a bug in code snippet? Our bad! <a href="https://github.com/upgini/upgini/issues/new?assignees=&title=readme%2Fbug">
|
|
844
|
-
Please report it here.</a></sup>
|
|
842
|
+
Please report it here.</a></sup>
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
upgini/__about__.py,sha256=iqiE7m95oLHgp-tKIBMVrOSu-y0RBxTaA5Ngsqhp5yk,24
|
|
2
|
+
upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
|
|
3
|
+
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
|
+
upgini/dataset.py,sha256=uiFY-P8te7-zigib1hGWRtW5v0X7chxPM0hJFdixAN8,45623
|
|
5
|
+
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
+
upgini/features_enricher.py,sha256=NU2lpp6ZrJ3oKOOLa6u7DQ5kb64n8mDAFXadWjr219A,176290
|
|
7
|
+
upgini/http.py,sha256=khrYSldpY-HbVLCcApfV1BjBFK6Uyuatb4colKybxgY,42301
|
|
8
|
+
upgini/metadata.py,sha256=CFJekYGD7Ep7pRFH7wCEcsXS4bz83do33FNmtcCY9P4,9729
|
|
9
|
+
upgini/metrics.py,sha256=L4LKSMOK9iKFLaJvTBTKk2tQauMgiJqtfrBclM3fBjs,29670
|
|
10
|
+
upgini/search_task.py,sha256=LtRJ9bCPjMo1gJ-sUDKERhDwGcWKImrzwVFHjkMSQHQ,17071
|
|
11
|
+
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
|
12
|
+
upgini/version_validator.py,sha256=RGg87VweujTNlibgsOuqPLIEiBgIOkuXNVTGuNCD234,1405
|
|
13
|
+
upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
|
|
14
|
+
upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
|
|
15
|
+
upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
+
upgini/autofe/all_operands.py,sha256=SyKVU-xGMHgoRZvHrCmba2u2Ygc73c1mXFolNSWe8Uo,2357
|
|
17
|
+
upgini/autofe/binary.py,sha256=441BRuqMsxlxuw4c8rMZB6h5EpRdVMk-bVa03U7T5Hg,3973
|
|
18
|
+
upgini/autofe/date.py,sha256=Vy1I92fLLYLhuYKJmtuPBMI8cPxE4Uwk40hqE2F2e1A,4224
|
|
19
|
+
upgini/autofe/feature.py,sha256=ChSuuIbRPGIWnPjKAgZbeAEi7Y_PjSVRyxxx41MyFp0,11845
|
|
20
|
+
upgini/autofe/groupby.py,sha256=4WjDzQxqpZxB79Ih4ihMMI5GDxaFqiH6ZelfV82ClT4,3091
|
|
21
|
+
upgini/autofe/operand.py,sha256=xgEIZuFCfckc6LpBqVu1OVK3JEabm1O-LHUsp83EHKA,2806
|
|
22
|
+
upgini/autofe/unary.py,sha256=v-l3aiE5hj6kurvh6adCQL8W3X9u9a7RVbS_WPR2qlw,3146
|
|
23
|
+
upgini/autofe/vector.py,sha256=dLxfAstJs-gw_OQ1xxoxcM6pVzORlV0HVzdzt7cLXVQ,606
|
|
24
|
+
upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
+
upgini/data_source/data_source_publisher.py,sha256=taRzyGgrPrTTSGw4Y-Ca5k4bf30aiTa68rxqT9zfqeI,16478
|
|
26
|
+
upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
|
|
27
|
+
upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
|
|
28
|
+
upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
|
+
upgini/normalizer/phone_normalizer.py,sha256=EzTaahk6myRv6ZXgbyVFGY4kpo_2VlQgOrm5_lfbmNI,9996
|
|
30
|
+
upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
|
|
31
|
+
upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
|
|
32
|
+
upgini/resource_bundle/strings.properties,sha256=1O779a0-Ai0j7W-Z5AznvjuV69YkJvgGhJda-6VMLOQ,26287
|
|
33
|
+
upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
|
|
34
|
+
upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
+
upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
|
|
36
|
+
upgini/sampler/random_under_sampler.py,sha256=TIbm7ATo-bCMF-IiS5sZeDC1ad1SYg0eY_rRmg84yIQ,4024
|
|
37
|
+
upgini/sampler/utils.py,sha256=PYOk3kKSnFlyxcpdtDNLBEEhTB4lO_iP7pQHqeUcmAc,20211
|
|
38
|
+
upgini/utils/__init__.py,sha256=O_KgzKiJjW3g4NoqZ7lAxUpoHcBi_gze6r3ndEjCH74,842
|
|
39
|
+
upgini/utils/base_search_key_detector.py,sha256=UNs2uxEcD1N_mOtkx3k6U70DCajW-QEO2vZp41GF0mU,855
|
|
40
|
+
upgini/utils/blocked_time_series.py,sha256=Uqr3vp4YqNclj2-PzEYqVy763GSXHn86sbpIl1UOB4s,3382
|
|
41
|
+
upgini/utils/country_utils.py,sha256=yE8oRgMpXuJxPfQm4fioY6dg6700HgVnHSk4Cv9sUyM,6511
|
|
42
|
+
upgini/utils/custom_loss_utils.py,sha256=bLk3uygqkJBaGkyzfO032d72QASae-dDyEURfFehVJo,3973
|
|
43
|
+
upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
|
|
44
|
+
upgini/utils/datetime_utils.py,sha256=-LsDTThsGKsTZ57V1uNiHtLcoTtqktk5tui4WnqggJo,10673
|
|
45
|
+
upgini/utils/deduplicate_utils.py,sha256=6AbARehUCghJZ4PppFtrej2s3gFRruh41MEm6mzakHs,8607
|
|
46
|
+
upgini/utils/display_utils.py,sha256=LKoSwjrE0xgS5_cqVhc2og2CQ1UCZ1nTI2VKboIhoQA,10858
|
|
47
|
+
upgini/utils/email_utils.py,sha256=PLufTO97Pg9PPsNqB9agcM6M98MIxKUgIgNn2mVwSQ0,3520
|
|
48
|
+
upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
|
|
49
|
+
upgini/utils/features_validator.py,sha256=PgKNt5dyqfErTvjtRNNUS9g7GFqHBtAtnsfA-V5UO1A,3307
|
|
50
|
+
upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
|
|
51
|
+
upgini/utils/ip_utils.py,sha256=Zf3F2cnQmOCH09QLQHetpjMFu1PnD0cTmDymn0SnSy8,1672
|
|
52
|
+
upgini/utils/phone_utils.py,sha256=JNSkF8G6mgsN8Czy11pamaJdsY6rBINEMpi7jbVt_RA,408
|
|
53
|
+
upgini/utils/postal_code_utils.py,sha256=_8CR9tBqsPptQsmMUvnrCAmBaMIQSWH3JfJ4ly3x_zs,409
|
|
54
|
+
upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
|
|
55
|
+
upgini/utils/sklearn_ext.py,sha256=c23MGSUVfxLnaDWKAxavHgnOtm5dGKkF3YswdWQcFzs,43984
|
|
56
|
+
upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
|
|
57
|
+
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
58
|
+
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
59
|
+
upgini-1.1.279.dist-info/METADATA,sha256=A4C2M3EmtRTSAaJXreEzPFZtkp2t-Ixy2NjRqa9uv30,48118
|
|
60
|
+
upgini-1.1.279.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
61
|
+
upgini-1.1.279.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
62
|
+
upgini-1.1.279.dist-info/RECORD,,
|