upgini 1.1.296a3521.dev8__py3-none-any.whl → 1.1.296a3521.dev10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/autofe/all_operands.py +2 -2
- upgini/autofe/date.py +4 -2
- upgini/autofe/feature.py +1 -5
- upgini/data_source/data_source_publisher.py +37 -0
- upgini/features_enricher.py +1 -1
- {upgini-1.1.296a3521.dev8.dist-info → upgini-1.1.296a3521.dev10.dist-info}/METADATA +1 -1
- {upgini-1.1.296a3521.dev8.dist-info → upgini-1.1.296a3521.dev10.dist-info}/RECORD +10 -10
- {upgini-1.1.296a3521.dev8.dist-info → upgini-1.1.296a3521.dev10.dist-info}/WHEEL +0 -0
- {upgini-1.1.296a3521.dev8.dist-info → upgini-1.1.296a3521.dev10.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.1.296a3521.
|
|
1
|
+
__version__ = "1.1.296a3521.dev10"
|
upgini/autofe/all_operands.py
CHANGED
|
@@ -7,7 +7,7 @@ from upgini.autofe.date import (
|
|
|
7
7
|
DateListDiff,
|
|
8
8
|
DateListDiffBounded,
|
|
9
9
|
DatePercentile,
|
|
10
|
-
|
|
10
|
+
DatePercentileMethod2,
|
|
11
11
|
)
|
|
12
12
|
from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
|
|
13
13
|
from upgini.autofe.operand import Operand
|
|
@@ -57,7 +57,7 @@ ALL_OPERANDS: Dict[str, Operand] = {
|
|
|
57
57
|
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=45, upper_bound=60),
|
|
58
58
|
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=60),
|
|
59
59
|
DatePercentile(),
|
|
60
|
-
|
|
60
|
+
DatePercentileMethod2(),
|
|
61
61
|
Norm(),
|
|
62
62
|
]
|
|
63
63
|
}
|
upgini/autofe/date.py
CHANGED
|
@@ -39,6 +39,7 @@ class DateDiffMixin(BaseModel):
|
|
|
39
39
|
|
|
40
40
|
class DateDiff(PandasOperand, DateDiffMixin):
|
|
41
41
|
name = "date_diff"
|
|
42
|
+
alias = "date_diff_type1"
|
|
42
43
|
is_binary = True
|
|
43
44
|
has_symmetry_importance = True
|
|
44
45
|
|
|
@@ -197,6 +198,7 @@ class DatePercentileBase(PandasOperand, abc.ABC):
|
|
|
197
198
|
|
|
198
199
|
class DatePercentile(DatePercentileBase):
|
|
199
200
|
name = "date_per"
|
|
201
|
+
alias = "date_per_method1"
|
|
200
202
|
|
|
201
203
|
zero_month: Optional[int]
|
|
202
204
|
zero_year: Optional[int]
|
|
@@ -232,8 +234,8 @@ class DatePercentile(DatePercentileBase):
|
|
|
232
234
|
)
|
|
233
235
|
|
|
234
236
|
|
|
235
|
-
class
|
|
236
|
-
name = "
|
|
237
|
+
class DatePercentileMethod2(DatePercentileBase):
|
|
238
|
+
name = "date_per_method2"
|
|
237
239
|
|
|
238
240
|
def _get_bounds(self, date_col: pd.Series) -> pd.Series:
|
|
239
241
|
pass
|
upgini/autofe/feature.py
CHANGED
|
@@ -134,9 +134,6 @@ class Feature:
|
|
|
134
134
|
for child in self.children:
|
|
135
135
|
child.delete_data()
|
|
136
136
|
|
|
137
|
-
def get_name_component(self, **kwargs) -> str:
|
|
138
|
-
return "_".join(ch.get_name_component(**kwargs) for ch in self.children) + "_" + self.get_op_display_name()
|
|
139
|
-
|
|
140
137
|
def get_op_display_name(self) -> str:
|
|
141
138
|
return self.op.alias or self.op.name.lower()
|
|
142
139
|
|
|
@@ -149,8 +146,7 @@ class Feature:
|
|
|
149
146
|
elif shorten and not self.op.is_unary:
|
|
150
147
|
components = ["f_autofe", self.get_op_display_name()]
|
|
151
148
|
else:
|
|
152
|
-
|
|
153
|
-
components = ["f_" + "_f_".join(child_components)] + [
|
|
149
|
+
components = ["f_" + "_f_".join(self.get_columns(**kwargs))] + [
|
|
154
150
|
"autofe",
|
|
155
151
|
self.get_op_display_name(),
|
|
156
152
|
]
|
|
@@ -59,9 +59,35 @@ class DataSourcePublisher:
|
|
|
59
59
|
features_for_embeddings: Optional[List[str]] = DEFAULT_GENERATE_EMBEDDINGS,
|
|
60
60
|
data_table_id_to_replace: Optional[str] = None,
|
|
61
61
|
keep_features: Optional[List[str]] = None,
|
|
62
|
+
date_features: Optional[List[str]] = None,
|
|
63
|
+
date_vector_features: Optional[List[str]] = None,
|
|
62
64
|
_force_generation=False,
|
|
63
65
|
_silent=False,
|
|
64
66
|
) -> str:
|
|
67
|
+
"""Register new ADS
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
data_table_uri - str - table name in format {project_id}.{datasource_name}.{table_name}
|
|
72
|
+
|
|
73
|
+
search_keys - dict with column names as keys and SearchKey as value
|
|
74
|
+
|
|
75
|
+
update_frequency - str - (Monthly, Weekly, Daily, Annually, Quarterly)
|
|
76
|
+
|
|
77
|
+
exclude_from_autofe_generation - optional list of features that should be excluded from AutoFE
|
|
78
|
+
|
|
79
|
+
secondary_search_keys - optional dict of secondary search keys
|
|
80
|
+
|
|
81
|
+
sort_column - optional str - name of unique column that could be used for sort
|
|
82
|
+
|
|
83
|
+
date_format - optional str - format of date if it is present in search keys
|
|
84
|
+
|
|
85
|
+
...
|
|
86
|
+
|
|
87
|
+
data_table_id_to_replace - optional str - id of registered ADS that should be replaced by new table
|
|
88
|
+
|
|
89
|
+
keep_features - optional list - features that should not be removed from ADS (even if they are personal)
|
|
90
|
+
"""
|
|
65
91
|
trace_id = str(uuid.uuid4())
|
|
66
92
|
|
|
67
93
|
with MDC(trace_id=trace_id):
|
|
@@ -124,6 +150,14 @@ class DataSourcePublisher:
|
|
|
124
150
|
request["excludeFromGeneration"] = exclude_from_autofe_generation
|
|
125
151
|
if keep_features is not None:
|
|
126
152
|
request["keepFeatures"] = keep_features
|
|
153
|
+
if date_features is not None:
|
|
154
|
+
if date_format is None:
|
|
155
|
+
raise ValidationError("date_format should be presented if you use date features")
|
|
156
|
+
request["dateFeatures"] = date_features
|
|
157
|
+
if date_vector_features is not None:
|
|
158
|
+
if date_format is None:
|
|
159
|
+
raise ValidationError("date_format should be presented if you use date vector features")
|
|
160
|
+
request["dateVectorFeatures"] = date_vector_features
|
|
127
161
|
self.logger.info(f"Start registering data table {request}")
|
|
128
162
|
|
|
129
163
|
task_id = self._rest_client.register_ads(request, trace_id)
|
|
@@ -181,6 +215,9 @@ class DataSourcePublisher:
|
|
|
181
215
|
msg = f"Data table successfully registered with id: {data_table_id}"
|
|
182
216
|
self.logger.info(msg)
|
|
183
217
|
print(msg)
|
|
218
|
+
if "warnings" in status_response and status_response["warnings"]:
|
|
219
|
+
self.logger.warning(status_response["warnings"])
|
|
220
|
+
print(status_response["warnings"])
|
|
184
221
|
return data_table_id
|
|
185
222
|
except KeyboardInterrupt:
|
|
186
223
|
if task_id is not None:
|
upgini/features_enricher.py
CHANGED
|
@@ -2870,7 +2870,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2870
2870
|
self.logger.info(f"Dates interval is ({min_date}, {max_date})")
|
|
2871
2871
|
|
|
2872
2872
|
except Exception:
|
|
2873
|
-
self.logger.
|
|
2873
|
+
self.logger.warning("Failed to log debug information", exc_info=True)
|
|
2874
2874
|
|
|
2875
2875
|
def __handle_index_search_keys(self, df: pd.DataFrame, search_keys: Dict[str, SearchKey]) -> pd.DataFrame:
|
|
2876
2876
|
index_names = df.index.names if df.index.names != [None] else [DEFAULT_INDEX]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.1.296a3521.
|
|
3
|
+
Version: 1.1.296a3521.dev10
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=JKvS85WNogY-2YIe8YIQ4nIWCAaaPwBgVr2LGrQzI7g,35
|
|
2
2
|
upgini/__init__.py,sha256=ObEtjFkIssl83qeKNMLpIQygfwK8TzztwiI43YTsAP0,353
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=7TLVVhGtjgx_9yaiaIUK3kZSe_R9wg5dY0d4F5qCGM4,45636
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=HQFLw3VyEsZfAt4xFnIYOnp3fzQSHAsyHzIm0gTJpOI,177543
|
|
7
7
|
upgini/http.py,sha256=bp6jWl422Icy3AhHMdCcJv5NjExE45gSMmzMTPJjPuk,42600
|
|
8
8
|
upgini/lazy_import.py,sha256=EwoM0msNGbSmWBhGbrLDny1DSnOlvTxCjmMKPxYlDms,610
|
|
9
9
|
upgini/metadata.py,sha256=qDAIO7NLSSQp_XiXCv3U4XJTLO0KH3YuQ8lvCLYPqzs,9781
|
|
@@ -14,16 +14,16 @@ upgini/version_validator.py,sha256=ddSKUK_-eGJB3NgrqOMoWJU-OxQ253WsNLp8aqJkaIM,1
|
|
|
14
14
|
upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
|
|
15
15
|
upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
|
|
16
16
|
upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
upgini/autofe/all_operands.py,sha256=
|
|
17
|
+
upgini/autofe/all_operands.py,sha256=XbvgX2IU4aee9rJZ--d5MdmrfKhON_emle5-RU1qlEY,2506
|
|
18
18
|
upgini/autofe/binary.py,sha256=8FXPJxN7fnC5wphO0Dp1tQCa0lFMSDGQGvBMkSIVAcE,4155
|
|
19
|
-
upgini/autofe/date.py,sha256=
|
|
20
|
-
upgini/autofe/feature.py,sha256=
|
|
19
|
+
upgini/autofe/date.py,sha256=8zYVhjl7jVS4xt-IjCgk9px2LHnACX2YlMlmDELlRTc,7943
|
|
20
|
+
upgini/autofe/feature.py,sha256=nV1oJCT65nsAYFfXYCXYNYXZPaZVpqKJ09iCMTldooc,13500
|
|
21
21
|
upgini/autofe/groupby.py,sha256=4WjDzQxqpZxB79Ih4ihMMI5GDxaFqiH6ZelfV82ClT4,3091
|
|
22
22
|
upgini/autofe/operand.py,sha256=MKEsl3zxpWzRDpTkE0sNJxTu62U20sWOvEKhPjUWS6s,2915
|
|
23
23
|
upgini/autofe/unary.py,sha256=ZWjLd-CUkNt_PpM8YuWLLipW1v_RdBlsl4JxXIVo9aM,3652
|
|
24
24
|
upgini/autofe/vector.py,sha256=dLxfAstJs-gw_OQ1xxoxcM6pVzORlV0HVzdzt7cLXVQ,606
|
|
25
25
|
upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
|
-
upgini/data_source/data_source_publisher.py,sha256=
|
|
26
|
+
upgini/data_source/data_source_publisher.py,sha256=1cQZrK630VztwGGDp41ec9gqIeUtkefaqSSQEitVWiM,19581
|
|
27
27
|
upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
|
|
28
28
|
upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
|
|
29
29
|
upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
57
57
|
upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
|
|
58
58
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
59
59
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
60
|
-
upgini-1.1.296a3521.
|
|
61
|
-
upgini-1.1.296a3521.
|
|
62
|
-
upgini-1.1.296a3521.
|
|
63
|
-
upgini-1.1.296a3521.
|
|
60
|
+
upgini-1.1.296a3521.dev10.dist-info/METADATA,sha256=RZTVD4L0dwwznJDxfpxjmCTZGzSQjE8lX1fw5CjP9ZA,48162
|
|
61
|
+
upgini-1.1.296a3521.dev10.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
62
|
+
upgini-1.1.296a3521.dev10.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
63
|
+
upgini-1.1.296a3521.dev10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|