upgini 1.2.60a3792.dev2__py3-none-any.whl → 1.2.62a3818.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/autofe/all_operands.py +2 -2
- upgini/autofe/binary.py +1 -1
- upgini/autofe/date.py +2 -2
- upgini/autofe/feature.py +1 -1
- upgini/autofe/groupby.py +1 -1
- upgini/autofe/{operand.py → operator.py} +2 -2
- upgini/autofe/timeseries.py +200 -0
- upgini/autofe/unary.py +1 -1
- upgini/autofe/vector.py +2 -198
- upgini/data_source/data_source_publisher.py +9 -4
- upgini/features_enricher.py +108 -46
- upgini/metrics.py +4 -7
- upgini/resource_bundle/strings.properties +1 -0
- upgini/utils/datetime_utils.py +2 -0
- upgini/utils/mstats.py +177 -0
- upgini/utils/sort.py +172 -0
- upgini/utils/target_utils.py +3 -3
- upgini/utils/ts_utils.py +0 -6
- {upgini-1.2.60a3792.dev2.dist-info → upgini-1.2.62a3818.dev1.dist-info}/METADATA +2 -1
- {upgini-1.2.60a3792.dev2.dist-info → upgini-1.2.62a3818.dev1.dist-info}/RECORD +23 -20
- {upgini-1.2.60a3792.dev2.dist-info → upgini-1.2.62a3818.dev1.dist-info}/WHEEL +0 -0
- {upgini-1.2.60a3792.dev2.dist-info → upgini-1.2.62a3818.dev1.dist-info}/licenses/LICENSE +0 -0
upgini/utils/target_utils.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import itertools
|
|
2
1
|
import logging
|
|
3
2
|
from typing import Callable, List, Optional, Union
|
|
4
3
|
|
|
@@ -208,7 +207,7 @@ def balance_undersample_forced(
|
|
|
208
207
|
id_columns: List[str],
|
|
209
208
|
date_column: str,
|
|
210
209
|
task_type: ModelTaskType,
|
|
211
|
-
cv_type: CVType
|
|
210
|
+
cv_type: Optional[CVType],
|
|
212
211
|
random_state: int,
|
|
213
212
|
sample_size: int = 7000,
|
|
214
213
|
logger: Optional[logging.Logger] = None,
|
|
@@ -372,7 +371,8 @@ def balance_undersample_time_series(
|
|
|
372
371
|
if len(id_counts) < min_different_ids:
|
|
373
372
|
if logger is not None:
|
|
374
373
|
logger.info(
|
|
375
|
-
f"Different ids count {len(id_counts)} for sample size {sample_size}
|
|
374
|
+
f"Different ids count {len(id_counts)} for sample size {sample_size}"
|
|
375
|
+
f" is less than min different ids {min_different_ids}, sampling time window"
|
|
376
376
|
)
|
|
377
377
|
date_counts = df.groupby(id_columns)[date_column].nunique().sort_values(ascending=False)
|
|
378
378
|
ids_to_sample = date_counts.index[:min_different_ids] if len(id_counts) > 0 else date_counts.index
|
upgini/utils/ts_utils.py
CHANGED
|
@@ -8,23 +8,17 @@ def get_most_frequent_time_unit(df: pd.DataFrame, id_columns: List[str], date_co
|
|
|
8
8
|
def closest_unit(diff):
|
|
9
9
|
return pd.tseries.frequencies.to_offset(pd.Timedelta(diff, unit="s"))
|
|
10
10
|
|
|
11
|
-
# Calculate differences for each ID group
|
|
12
11
|
all_diffs = []
|
|
13
12
|
groups = df.groupby(id_columns) if id_columns else [(None, df)]
|
|
14
13
|
for _, group in groups:
|
|
15
|
-
# Get sorted dates for this group
|
|
16
14
|
group_dates = group[date_column].sort_values().unique()
|
|
17
15
|
if len(group_dates) > 1:
|
|
18
|
-
# Calculate time differences between consecutive dates
|
|
19
16
|
diff_series = pd.Series(group_dates[1:] - group_dates[:-1])
|
|
20
|
-
# Convert to nanoseconds
|
|
21
17
|
diff_ns = diff_series.dt.total_seconds()
|
|
22
18
|
all_diffs.extend(diff_ns)
|
|
23
19
|
|
|
24
|
-
# Convert to series for easier processing
|
|
25
20
|
all_diffs = pd.Series(all_diffs)
|
|
26
21
|
|
|
27
|
-
# Get most common time unit across all groups
|
|
28
22
|
most_frequent_unit = all_diffs.apply(closest_unit).mode().min()
|
|
29
23
|
|
|
30
24
|
return most_frequent_unit if isinstance(most_frequent_unit, pd.DateOffset) else None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.62a3818.dev1
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -30,6 +30,7 @@ Requires-Dist: jarowinkler>=2.0.0
|
|
|
30
30
|
Requires-Dist: levenshtein>=0.25.1
|
|
31
31
|
Requires-Dist: numpy<=1.26.4,>=1.19.0
|
|
32
32
|
Requires-Dist: pandas<3.0.0,>=1.1.0
|
|
33
|
+
Requires-Dist: psutil>=6.0.0
|
|
33
34
|
Requires-Dist: pydantic<3.0.0,>1.0.0
|
|
34
35
|
Requires-Dist: pyjwt>=2.8.0
|
|
35
36
|
Requires-Dist: python-bidi==0.4.2
|
|
@@ -1,36 +1,37 @@
|
|
|
1
|
-
upgini/__about__.py,sha256
|
|
1
|
+
upgini/__about__.py,sha256=-inFSOjK0otU7oAU9xIxafvjGaGWyHQqEAz5nWw5yqI,33
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=OGjpeFHbj3lWiZTOHTpWEoMMDmFY1FlNC44FKktoZvU,34956
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=cB2I5rNpbztjkYEEW5aJuKj2fCMnfxp40X4Eo63oyuQ,205340
|
|
7
7
|
upgini/http.py,sha256=ud0Cp7h0jNeHuuZGpU_1dAAEiabGoJjGxc1X5oeBQr4,43496
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=Jh6YTaS00m_nbaOY_owvlSyn9zgkErkqu8iTr9ZjKI8,12279
|
|
10
|
-
upgini/metrics.py,sha256=
|
|
10
|
+
upgini/metrics.py,sha256=t7uOOnlDYvP6E3DLjPMQcFBjyhJfUQY8aUlx7N0Mh-s,35477
|
|
11
11
|
upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
|
|
12
12
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
|
13
13
|
upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
|
|
14
14
|
upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
|
|
15
15
|
upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
|
|
16
16
|
upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
upgini/autofe/all_operands.py,sha256=
|
|
18
|
-
upgini/autofe/binary.py,sha256=
|
|
19
|
-
upgini/autofe/date.py,sha256=
|
|
20
|
-
upgini/autofe/feature.py,sha256=
|
|
21
|
-
upgini/autofe/groupby.py,sha256=
|
|
22
|
-
upgini/autofe/
|
|
23
|
-
upgini/autofe/
|
|
24
|
-
upgini/autofe/
|
|
17
|
+
upgini/autofe/all_operands.py,sha256=VIT5jCq5U-qypdNz1MIQ_hlIAs0ujJgRfKRUkU24nFs,332
|
|
18
|
+
upgini/autofe/binary.py,sha256=jsXa_zwlNWRmQAT5qipzU2Or03qae-a1kkY9yDECkq8,7660
|
|
19
|
+
upgini/autofe/date.py,sha256=bmoXU5vlDa1xsfCIFEC_VMRHOnV8Sy_KUMshqh0ARvA,10722
|
|
20
|
+
upgini/autofe/feature.py,sha256=n4sNNFM9b022AGJbW14AMRuERD9bwub-RWqa6hfLID0,14750
|
|
21
|
+
upgini/autofe/groupby.py,sha256=NN0T-tYbTHQDeCi2UZ06wVkDflm8DJBV4rdGrrVyVEE,3596
|
|
22
|
+
upgini/autofe/operator.py,sha256=VCGDUQ5bOtwX-jzmgHDrKF3GbglDumyEkvtLWTmSGQo,4776
|
|
23
|
+
upgini/autofe/timeseries.py,sha256=Pci7kNpFcViNZdIHlVTyxjoxzcMVdqUPopbPrJ3hE20,6593
|
|
24
|
+
upgini/autofe/unary.py,sha256=my7AYIrWCQPFxRtcphONmwieU5HpX4fHiKllFRCsMUk,4647
|
|
25
|
+
upgini/autofe/vector.py,sha256=5Lx2q_Np9PrMtZ_8O86xywq0s4XSQbooHxK3ufo3ANU,664
|
|
25
26
|
upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
|
-
upgini/data_source/data_source_publisher.py,sha256=
|
|
27
|
+
upgini/data_source/data_source_publisher.py,sha256=4S9qwlAklD8vg9tUU_c1pHE2_glUHAh15-wr5hMwKFw,22879
|
|
27
28
|
upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
|
|
28
29
|
upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
|
|
29
30
|
upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
30
31
|
upgini/normalizer/normalize_utils.py,sha256=Ft2MwSgVoBilXAORAOYAuwPD79GOLfwn4qQE3IUFzzg,7218
|
|
31
32
|
upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
|
|
32
33
|
upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
|
|
33
|
-
upgini/resource_bundle/strings.properties,sha256=
|
|
34
|
+
upgini/resource_bundle/strings.properties,sha256=3zctRNQDJ1STTvLUfryBT72wYeHYnrllV4rG1C3HtfI,27542
|
|
34
35
|
upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
|
|
35
36
|
upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
37
|
upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
|
|
@@ -43,7 +44,7 @@ upgini/utils/blocked_time_series.py,sha256=Uqr3vp4YqNclj2-PzEYqVy763GSXHn86sbpIl
|
|
|
43
44
|
upgini/utils/country_utils.py,sha256=lY-eXWwFVegdVENFttbvLcgGDjFO17Sex8hd2PyJaRk,6937
|
|
44
45
|
upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
|
|
45
46
|
upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
|
|
46
|
-
upgini/utils/datetime_utils.py,sha256=
|
|
47
|
+
upgini/utils/datetime_utils.py,sha256=_jq-kn_dGNFfs-DGXcWCGzy9bkplfAjrZ8SsmN28zXc,13535
|
|
47
48
|
upgini/utils/deduplicate_utils.py,sha256=SMZx9IKIhWI5HqXepfKiQb3uDJrogQZtG6jcWuMo5Z4,8855
|
|
48
49
|
upgini/utils/display_utils.py,sha256=DsBjJ8jEYAh8BPgfAbzq5imoGFV6IACP20PQ78BQCX0,11964
|
|
49
50
|
upgini/utils/email_utils.py,sha256=pZ2vCfNxLIPUhxr0-OlABNXm12jjU44isBk8kGmqQzA,5277
|
|
@@ -52,15 +53,17 @@ upgini/utils/feature_info.py,sha256=0rOXSyCj-sw-8migWP0ge8qrOzGU50dQvH0JUJUrDfQ,
|
|
|
52
53
|
upgini/utils/features_validator.py,sha256=lEfmk4DoxZ4ooOE1HC0ZXtUb_lFKRFHIrnFULZ4_rL8,3746
|
|
53
54
|
upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
|
|
54
55
|
upgini/utils/ip_utils.py,sha256=TSQ_qDsLlVnm09X1HacpabEf_HNqSWpxBF4Sdc2xs08,6580
|
|
56
|
+
upgini/utils/mstats.py,sha256=GjBAUacgfAoVQVFUrMiRYdVkmx93CIThLRNvYLLiV48,5765
|
|
55
57
|
upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
|
|
56
58
|
upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
|
|
57
59
|
upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
|
|
58
60
|
upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,44511
|
|
59
|
-
upgini/utils/
|
|
61
|
+
upgini/utils/sort.py,sha256=w-CoT33W_53ekOROpKI_VRsRmiyWNr2b3IpE5_4MLLA,6395
|
|
62
|
+
upgini/utils/target_utils.py,sha256=b1GzO8_gMcwXSZ2v98CY50MJJBzKbWHId_BJGybXfkM,16579
|
|
60
63
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
61
|
-
upgini/utils/ts_utils.py,sha256=
|
|
64
|
+
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
|
62
65
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
63
|
-
upgini-1.2.
|
|
64
|
-
upgini-1.2.
|
|
65
|
-
upgini-1.2.
|
|
66
|
-
upgini-1.2.
|
|
66
|
+
upgini-1.2.62a3818.dev1.dist-info/METADATA,sha256=9mRM2yQ18CeOTHQ83UgVmItZ-npsZSla3illeXSpyTQ,49094
|
|
67
|
+
upgini-1.2.62a3818.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
68
|
+
upgini-1.2.62a3818.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
69
|
+
upgini-1.2.62a3818.dev1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|