upgini 1.2.99a3922.dev5__tar.gz → 1.2.103__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/PKG-INFO +1 -1
- upgini-1.2.103/src/upgini/__about__.py +1 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/feature.py +1 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/operator.py +3 -0
- upgini-1.2.103/src/upgini/autofe/vector.py +75 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/data_source/data_source_publisher.py +4 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/features_enricher.py +8 -1
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/resource_bundle/strings.properties +1 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/display_utils.py +0 -4
- upgini-1.2.99a3922.dev5/src/upgini/__about__.py +0 -1
- upgini-1.2.99a3922.dev5/src/upgini/autofe/vector.py +0 -45
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/.gitignore +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/LICENSE +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/README.md +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/pyproject.toml +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/__init__.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/ads.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/all_operators.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/timeseries/__init__.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/timeseries/base.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/timeseries/cross.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/timeseries/delta.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/timeseries/lag.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/timeseries/roll.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/timeseries/trend.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/timeseries/volatility.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/autofe/utils.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/dataset.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/errors.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/http.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/metadata.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/metrics.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/search_task.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/spinner.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/feature_info.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/mstats.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/sample_utils.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/sort.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/ts_utils.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/version_validator.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "1.2.103"
|
@@ -0,0 +1,75 @@
|
|
1
|
+
from typing import List, Optional
|
2
|
+
|
3
|
+
import pandas as pd
|
4
|
+
|
5
|
+
from upgini.autofe.operator import OperatorRegistry, PandasOperator, ParametrizedOperator, VectorizableMixin
|
6
|
+
|
7
|
+
|
8
|
+
class Mean(PandasOperator, VectorizableMixin):
|
9
|
+
name: str = "mean"
|
10
|
+
output_type: Optional[str] = "float"
|
11
|
+
is_vector: bool = True
|
12
|
+
group_index: int = 0
|
13
|
+
|
14
|
+
def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
15
|
+
return pd.DataFrame(data).T.fillna(0).mean(axis=1)
|
16
|
+
|
17
|
+
|
18
|
+
class Sum(PandasOperator, VectorizableMixin):
|
19
|
+
name: str = "sum"
|
20
|
+
is_vector: bool = True
|
21
|
+
group_index: int = 0
|
22
|
+
|
23
|
+
def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
24
|
+
return pd.DataFrame(data).T.fillna(0).sum(axis=1)
|
25
|
+
|
26
|
+
|
27
|
+
class Vectorize(PandasOperator, VectorizableMixin):
|
28
|
+
name: str = "vectorize"
|
29
|
+
is_vector: bool = True
|
30
|
+
group_index: int = 0
|
31
|
+
|
32
|
+
def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
33
|
+
return pd.DataFrame(data).T.apply(lambda x: x.to_list(), axis=1)
|
34
|
+
|
35
|
+
|
36
|
+
class OnnxModel(PandasOperator, ParametrizedOperator, metaclass=OperatorRegistry):
|
37
|
+
name: str = "onnx"
|
38
|
+
score_name: str = "score"
|
39
|
+
is_vector: bool = True
|
40
|
+
output_type: Optional[str] = "float"
|
41
|
+
|
42
|
+
def to_formula(self) -> str:
|
43
|
+
return f"onnx_{self.score_name}"
|
44
|
+
|
45
|
+
@classmethod
|
46
|
+
def from_formula(cls, formula: str) -> Optional["OnnxModel"]:
|
47
|
+
if "(" in formula:
|
48
|
+
return None
|
49
|
+
if formula.startswith("onnx_"):
|
50
|
+
score_name = formula[len("onnx_"):]
|
51
|
+
return cls(score_name=score_name)
|
52
|
+
elif formula == "onnx": # for OperatorRegistry
|
53
|
+
return cls()
|
54
|
+
return None
|
55
|
+
|
56
|
+
|
57
|
+
class CatboostModel(PandasOperator, ParametrizedOperator, metaclass=OperatorRegistry):
|
58
|
+
name: str = "catboost"
|
59
|
+
score_name: str = "score"
|
60
|
+
is_vector: bool = True
|
61
|
+
output_type: Optional[str] = "float"
|
62
|
+
|
63
|
+
def to_formula(self) -> str:
|
64
|
+
return f"catboost_{self.score_name}"
|
65
|
+
|
66
|
+
@classmethod
|
67
|
+
def from_formula(cls, formula: str) -> Optional["CatboostModel"]:
|
68
|
+
if "(" in formula:
|
69
|
+
return None
|
70
|
+
if formula.startswith("catboost_"):
|
71
|
+
score_name = formula[len("catboost_"):]
|
72
|
+
return cls(score_name=score_name)
|
73
|
+
elif formula == "catboost": # for OperatorRegistry
|
74
|
+
return cls()
|
75
|
+
return None
|
@@ -500,6 +500,8 @@ class DataSourcePublisher:
|
|
500
500
|
name: str,
|
501
501
|
input_names: List[str],
|
502
502
|
search_id: str,
|
503
|
+
date_column: Optional[str] = None,
|
504
|
+
score_name: Optional[str] = None,
|
503
505
|
model_type: Optional[Literal["ONNX", "CATBOOST"]] = None,
|
504
506
|
description: str = "",
|
505
507
|
):
|
@@ -508,6 +510,8 @@ class DataSourcePublisher:
|
|
508
510
|
metadata = {
|
509
511
|
"modelName": name,
|
510
512
|
"inputNames": input_names,
|
513
|
+
"dateColumn": date_column,
|
514
|
+
"scoreName": score_name,
|
511
515
|
"searchTaskId": search_id,
|
512
516
|
"modelType": model_type or "ONNX",
|
513
517
|
"description": description,
|
@@ -3708,6 +3708,11 @@ if response.status_code == 200:
|
|
3708
3708
|
|
3709
3709
|
@staticmethod
|
3710
3710
|
def _get_group_columns(df: pd.DataFrame, search_keys: Dict[str, SearchKey]) -> List[str]:
|
3711
|
+
search_key_priority = [SearchKey.PHONE, SearchKey.EMAIL, SearchKey.HEM, SearchKey.IP]
|
3712
|
+
for key_type in search_key_priority:
|
3713
|
+
if key_type in search_keys.values():
|
3714
|
+
return [col for col, t in search_keys.items() if t == key_type]
|
3715
|
+
|
3711
3716
|
return [
|
3712
3717
|
col
|
3713
3718
|
for col, t in search_keys.items()
|
@@ -4481,7 +4486,9 @@ if response.status_code == 200:
|
|
4481
4486
|
sample = df.head(100)
|
4482
4487
|
|
4483
4488
|
def check_need_detect(search_key: SearchKey):
|
4484
|
-
return not is_transform or
|
4489
|
+
return not is_transform or (
|
4490
|
+
search_key in self.fit_search_keys.values() and search_key not in search_keys.values()
|
4491
|
+
)
|
4485
4492
|
|
4486
4493
|
# if SearchKey.POSTAL_CODE not in search_keys.values() and check_need_detect(SearchKey.POSTAL_CODE):
|
4487
4494
|
if check_need_detect(SearchKey.POSTAL_CODE):
|
@@ -99,6 +99,7 @@ unsupported_date_type=Unsupported type of date column `{}`. Convert to datetime
|
|
99
99
|
invalid_postal_code=All values of POSTAL_CODE column `{}` are invalid
|
100
100
|
invalid_country=All values of COUNTRY column `{}` are invalid
|
101
101
|
invalid_ip=All values of IP column `{}` are invalid
|
102
|
+
transform_search_keys_not_match_fit_keys=Search keys used in fit call {} are different from the search keys used in transform call. Please use the same search keys as in fit call.
|
102
103
|
# X and y validation
|
103
104
|
unsupported_x_type=Unsupported type of X: {}. Use pandas.DataFrame, pandas.Series or numpy.ndarray or list
|
104
105
|
x_contains_dup_columns=X contains duplicate column names. Please rename or drop duplicates
|
@@ -152,10 +152,6 @@ def make_html_report(
|
|
152
152
|
email: Optional[str] = None,
|
153
153
|
search_keys: Optional[List[str]] = None,
|
154
154
|
) -> str:
|
155
|
-
# relevant_features_df = relevant_features_df.copy()
|
156
|
-
# relevant_features_df["Feature name"] = relevant_features_df["Feature name"].apply(
|
157
|
-
# lambda x: "*" + x if x.contains("_autofe_") else x
|
158
|
-
# )
|
159
155
|
relevant_datasources_df = relevant_datasources_df.copy()
|
160
156
|
relevant_datasources_df["action"] = (
|
161
157
|
f"""<a href="https://upgini.com/request-a-quote?search-id={search_id}">"""
|
@@ -1 +0,0 @@
|
|
1
|
-
__version__ = "1.2.99a3922.dev5"
|
@@ -1,45 +0,0 @@
|
|
1
|
-
from typing import List, Optional
|
2
|
-
|
3
|
-
import pandas as pd
|
4
|
-
|
5
|
-
from upgini.autofe.operator import OperatorRegistry, PandasOperator, VectorizableMixin
|
6
|
-
|
7
|
-
|
8
|
-
class Mean(PandasOperator, VectorizableMixin):
|
9
|
-
name: str = "mean"
|
10
|
-
output_type: Optional[str] = "float"
|
11
|
-
is_vector: bool = True
|
12
|
-
group_index: int = 0
|
13
|
-
|
14
|
-
def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
15
|
-
return pd.DataFrame(data).T.fillna(0).mean(axis=1)
|
16
|
-
|
17
|
-
|
18
|
-
class Sum(PandasOperator, VectorizableMixin):
|
19
|
-
name: str = "sum"
|
20
|
-
is_vector: bool = True
|
21
|
-
group_index: int = 0
|
22
|
-
|
23
|
-
def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
24
|
-
return pd.DataFrame(data).T.fillna(0).sum(axis=1)
|
25
|
-
|
26
|
-
|
27
|
-
class Vectorize(PandasOperator, VectorizableMixin):
|
28
|
-
name: str = "vectorize"
|
29
|
-
is_vector: bool = True
|
30
|
-
group_index: int = 0
|
31
|
-
|
32
|
-
def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
33
|
-
return pd.DataFrame(data).T.apply(lambda x: x.to_list(), axis=1)
|
34
|
-
|
35
|
-
|
36
|
-
class OnnxModel(PandasOperator, metaclass=OperatorRegistry):
|
37
|
-
name: str = "onnx"
|
38
|
-
is_vector: bool = True
|
39
|
-
output_type: Optional[str] = "float"
|
40
|
-
|
41
|
-
|
42
|
-
class CatboostModel(PandasOperator, metaclass=OperatorRegistry):
|
43
|
-
name: str = "catboost"
|
44
|
-
is_vector: bool = True
|
45
|
-
output_type: Optional[str] = "float"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{upgini-1.2.99a3922.dev5 → upgini-1.2.103}/src/upgini/resource_bundle/strings_widget.properties
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|