upgini 1.1.287a3232.post1__py3-none-any.whl → 1.1.288a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/autofe/date.py +7 -18
- upgini/data_source/data_source_publisher.py +3 -0
- upgini/features_enricher.py +3 -3
- upgini/resource_bundle/strings.properties +1 -1
- upgini/utils/sklearn_ext.py +1 -1
- {upgini-1.1.287a3232.post1.dist-info → upgini-1.1.288a0.dist-info}/METADATA +1 -1
- {upgini-1.1.287a3232.post1.dist-info → upgini-1.1.288a0.dist-info}/RECORD +10 -10
- {upgini-1.1.287a3232.post1.dist-info → upgini-1.1.288a0.dist-info}/WHEEL +0 -0
- {upgini-1.1.287a3232.post1.dist-info → upgini-1.1.288a0.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.1.
|
|
1
|
+
__version__ = "1.1.288a0"
|
upgini/autofe/date.py
CHANGED
|
@@ -2,7 +2,6 @@ from typing import Any, Dict, List, Optional, Union
|
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import pandas as pd
|
|
5
|
-
import datetime
|
|
6
5
|
from pandas.core.arrays.timedeltas import TimedeltaArray
|
|
7
6
|
from pydantic import BaseModel, validator
|
|
8
7
|
|
|
@@ -22,20 +21,6 @@ class DateDiffMixin(BaseModel):
|
|
|
22
21
|
|
|
23
22
|
return pd.to_datetime(x, unit=unit)
|
|
24
23
|
|
|
25
|
-
def _convert_diff_to_unit(self, diff: Union[pd.Series, TimedeltaArray]) -> Union[pd.Series, TimedeltaArray]:
|
|
26
|
-
if self.diff_unit == "M":
|
|
27
|
-
raise Exception("Unsupported difference unit: Month")
|
|
28
|
-
elif self.diff_unit == "D":
|
|
29
|
-
if isinstance(diff, pd.Series) and diff.dtype == "object":
|
|
30
|
-
return diff.apply(lambda x: None if isinstance(x, float) and np.isnan(x) else x.days)
|
|
31
|
-
else:
|
|
32
|
-
return diff / np.timedelta64(1, self.diff_unit)
|
|
33
|
-
elif self.diff_unit == "Y":
|
|
34
|
-
if isinstance(diff, TimedeltaArray):
|
|
35
|
-
return (diff / 365 / 24 / 60 / 60 / 10**9).astype(int)
|
|
36
|
-
else:
|
|
37
|
-
return (diff / 365 / 24 / 60 / 60 / 10**9).dt.nanoseconds
|
|
38
|
-
|
|
39
24
|
|
|
40
25
|
class DateDiff(PandasOperand, DateDiffMixin):
|
|
41
26
|
name = "date_diff"
|
|
@@ -56,8 +41,7 @@ class DateDiff(PandasOperand, DateDiffMixin):
|
|
|
56
41
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
57
42
|
left = self._convert_to_date(left, self.left_unit)
|
|
58
43
|
right = self._convert_to_date(right, self.right_unit)
|
|
59
|
-
|
|
60
|
-
return self.__replace_negative(diff)
|
|
44
|
+
return self.__replace_negative((left - right) / np.timedelta64(1, self.diff_unit))
|
|
61
45
|
|
|
62
46
|
def __replace_negative(self, x: Union[pd.DataFrame, pd.Series]):
|
|
63
47
|
x[x < 0] = None
|
|
@@ -123,7 +107,12 @@ class DateListDiff(PandasOperand, DateDiffMixin):
|
|
|
123
107
|
return pd.Series(left - right.values).apply(lambda x: self._agg(self._diff(x)))
|
|
124
108
|
|
|
125
109
|
def _diff(self, x: TimedeltaArray):
|
|
126
|
-
|
|
110
|
+
if self.diff_unit == "Y":
|
|
111
|
+
x = (x / 365 / 24 / 60 / 60 / 10**9).astype(int)
|
|
112
|
+
elif self.diff_unit == "M":
|
|
113
|
+
raise Exception("Unsupported difference unit: Month")
|
|
114
|
+
else:
|
|
115
|
+
x = x / np.timedelta64(1, self.diff_unit)
|
|
127
116
|
return x[x > 0]
|
|
128
117
|
|
|
129
118
|
def _agg(self, x):
|
|
@@ -58,6 +58,7 @@ class DataSourcePublisher:
|
|
|
58
58
|
join_date_abs_limit_days: Optional[int] = None,
|
|
59
59
|
features_for_embeddings: Optional[List[str]] = DEFAULT_GENERATE_EMBEDDINGS,
|
|
60
60
|
data_table_id_to_replace: Optional[str] = None,
|
|
61
|
+
keep_features: Optional[List[str]] = None,
|
|
61
62
|
_force_generation=False,
|
|
62
63
|
_silent=False,
|
|
63
64
|
) -> str:
|
|
@@ -116,6 +117,8 @@ class DataSourcePublisher:
|
|
|
116
117
|
request["adsDefinitionIdToReplace"] = data_table_id_to_replace
|
|
117
118
|
if exclude_from_autofe_generation is not None:
|
|
118
119
|
request["excludeFromGeneration"] = exclude_from_autofe_generation
|
|
120
|
+
if keep_features is not None:
|
|
121
|
+
request["keepFeatures"] = keep_features
|
|
119
122
|
self.logger.info(f"Start registering data table {request}")
|
|
120
123
|
|
|
121
124
|
task_id = self._rest_client.register_ads(request, trace_id)
|
upgini/features_enricher.py
CHANGED
|
@@ -2596,9 +2596,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2596
2596
|
return validated_X
|
|
2597
2597
|
|
|
2598
2598
|
def _validate_y(self, X: pd.DataFrame, y) -> pd.Series:
|
|
2599
|
-
if _num_samples(y) == 0:
|
|
2600
|
-
raise ValidationError(self.bundle.get("y_is_empty"))
|
|
2601
|
-
|
|
2602
2599
|
if (
|
|
2603
2600
|
not isinstance(y, pd.Series)
|
|
2604
2601
|
and not isinstance(y, pd.DataFrame)
|
|
@@ -2607,6 +2604,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2607
2604
|
):
|
|
2608
2605
|
raise ValidationError(self.bundle.get("unsupported_y_type").format(type(y)))
|
|
2609
2606
|
|
|
2607
|
+
if _num_samples(y) == 0:
|
|
2608
|
+
raise ValidationError(self.bundle.get("y_is_empty"))
|
|
2609
|
+
|
|
2610
2610
|
if _num_samples(X) != _num_samples(y):
|
|
2611
2611
|
raise ValidationError(self.bundle.get("x_and_y_diff_size").format(_num_samples(X), _num_samples(y)))
|
|
2612
2612
|
|
|
@@ -81,7 +81,7 @@ date_and_datetime_simultanious=DATE and DATETIME search keys cannot be used simu
|
|
|
81
81
|
email_and_hem_simultanious=EMAIL and HEM search keys cannot be used simultaneously. Choose one to keep
|
|
82
82
|
postal_code_without_country=COUNTRY search key required if POSTAL_CODE is present
|
|
83
83
|
multiple_search_key=Search key {} passed multiple times
|
|
84
|
-
unregistered_only_personal_keys=Only personal search keys used. Api_key from profile.upgini.com required for EMAIL/HEM, PHONE NUMBER or IPv4 search keys\nSee docs https://github.com/upgini/upgini#-open-up-all-capabilities-of-upgini
|
|
84
|
+
unregistered_only_personal_keys=Only personal search keys used. Api_key from profile.upgini.com required for EMAIL/HEM, PHONE NUMBER or IPv4/IPv6 search keys\nSee docs https://github.com/upgini/upgini#-open-up-all-capabilities-of-upgini
|
|
85
85
|
search_key_not_found=Column `{}` from search_keys was not found in X dataframe: {}
|
|
86
86
|
numeric_search_key_not_found=Index {} in search_keys is out of bounds for {} columns of X dataframe
|
|
87
87
|
unsupported_search_key_type=Unsupported type of key in search_keys: {}
|
upgini/utils/sklearn_ext.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.288a0
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=3Qkh5WTdySU-oJImISkkJO-aROpU4gchsmPuaaEbmuU,26
|
|
2
2
|
upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=7TLVVhGtjgx_9yaiaIUK3kZSe_R9wg5dY0d4F5qCGM4,45636
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=Bfqbzqj3h1Ox3s6hBA4UYXkG7hQDqGPB4JK1YXtxXxw,177530
|
|
7
7
|
upgini/http.py,sha256=khrYSldpY-HbVLCcApfV1BjBFK6Uyuatb4colKybxgY,42301
|
|
8
8
|
upgini/metadata.py,sha256=qDAIO7NLSSQp_XiXCv3U4XJTLO0KH3YuQ8lvCLYPqzs,9781
|
|
9
9
|
upgini/metrics.py,sha256=DLvA2YLV4f7lnzBCcfZ5T4NkqAv3pbstbjTepavuT7U,30688
|
|
@@ -15,21 +15,21 @@ upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo
|
|
|
15
15
|
upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
16
|
upgini/autofe/all_operands.py,sha256=7UyvmmqGSqQu4kDgoFwQRKY__b9xKDk3Fpp2-H8A7AA,2399
|
|
17
17
|
upgini/autofe/binary.py,sha256=441BRuqMsxlxuw4c8rMZB6h5EpRdVMk-bVa03U7T5Hg,3973
|
|
18
|
-
upgini/autofe/date.py,sha256=
|
|
18
|
+
upgini/autofe/date.py,sha256=w0C2n261Uzd9sEk3s7QdDrXLZBWv6Vv7EBuv0W1g-LU,6738
|
|
19
19
|
upgini/autofe/feature.py,sha256=_V9B74B3ue7eAYXSOt9JKhVC9klkAKks22MwnBRye_w,12487
|
|
20
20
|
upgini/autofe/groupby.py,sha256=4WjDzQxqpZxB79Ih4ihMMI5GDxaFqiH6ZelfV82ClT4,3091
|
|
21
21
|
upgini/autofe/operand.py,sha256=JjEVT1U3kY9NDjUPMdoki7Oa8hMDG0-_h_NklVjIFyc,2882
|
|
22
22
|
upgini/autofe/unary.py,sha256=v-l3aiE5hj6kurvh6adCQL8W3X9u9a7RVbS_WPR2qlw,3146
|
|
23
23
|
upgini/autofe/vector.py,sha256=dLxfAstJs-gw_OQ1xxoxcM6pVzORlV0HVzdzt7cLXVQ,606
|
|
24
24
|
upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
-
upgini/data_source/data_source_publisher.py,sha256=
|
|
25
|
+
upgini/data_source/data_source_publisher.py,sha256=B4fJ1owDCF5ZZ0Ca9ywi_CXVt4iPvABh5BGTnXdXmHk,16635
|
|
26
26
|
upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
|
|
27
27
|
upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
|
|
28
28
|
upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
29
|
upgini/normalizer/phone_normalizer.py,sha256=EzTaahk6myRv6ZXgbyVFGY4kpo_2VlQgOrm5_lfbmNI,9996
|
|
30
30
|
upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
|
|
31
31
|
upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
|
|
32
|
-
upgini/resource_bundle/strings.properties,sha256=
|
|
32
|
+
upgini/resource_bundle/strings.properties,sha256=1oHurL4I83P2lXIavx9vSdKM8ZqncAPXH2IZf76bD6g,26292
|
|
33
33
|
upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
|
|
34
34
|
upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
35
|
upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
|
|
@@ -52,11 +52,11 @@ upgini/utils/ip_utils.py,sha256=Zf3F2cnQmOCH09QLQHetpjMFu1PnD0cTmDymn0SnSy8,1672
|
|
|
52
52
|
upgini/utils/phone_utils.py,sha256=JNSkF8G6mgsN8Czy11pamaJdsY6rBINEMpi7jbVt_RA,408
|
|
53
53
|
upgini/utils/postal_code_utils.py,sha256=_8CR9tBqsPptQsmMUvnrCAmBaMIQSWH3JfJ4ly3x_zs,409
|
|
54
54
|
upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
|
|
55
|
-
upgini/utils/sklearn_ext.py,sha256=
|
|
55
|
+
upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,44511
|
|
56
56
|
upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
|
|
57
57
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
58
58
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
59
|
-
upgini-1.1.
|
|
60
|
-
upgini-1.1.
|
|
61
|
-
upgini-1.1.
|
|
62
|
-
upgini-1.1.
|
|
59
|
+
upgini-1.1.288a0.dist-info/METADATA,sha256=E4zY2U029vSoJLOe5NFkIxJ5_Loj342ORFXKIYI8BjY,48119
|
|
60
|
+
upgini-1.1.288a0.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
61
|
+
upgini-1.1.288a0.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
62
|
+
upgini-1.1.288a0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|