upgini 1.2.60a3792.dev2__py3-none-any.whl → 1.2.62a3818.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

@@ -1,4 +1,3 @@
1
- import itertools
2
1
  import logging
3
2
  from typing import Callable, List, Optional, Union
4
3
 
@@ -208,7 +207,7 @@ def balance_undersample_forced(
208
207
  id_columns: List[str],
209
208
  date_column: str,
210
209
  task_type: ModelTaskType,
211
- cv_type: CVType | None,
210
+ cv_type: Optional[CVType],
212
211
  random_state: int,
213
212
  sample_size: int = 7000,
214
213
  logger: Optional[logging.Logger] = None,
@@ -372,7 +371,8 @@ def balance_undersample_time_series(
372
371
  if len(id_counts) < min_different_ids:
373
372
  if logger is not None:
374
373
  logger.info(
375
- f"Different ids count {len(id_counts)} for sample size {sample_size} is less than min different ids {min_different_ids}, sampling time window"
374
+ f"Different ids count {len(id_counts)} for sample size {sample_size}"
375
+ f" is less than min different ids {min_different_ids}, sampling time window"
376
376
  )
377
377
  date_counts = df.groupby(id_columns)[date_column].nunique().sort_values(ascending=False)
378
378
  ids_to_sample = date_counts.index[:min_different_ids] if len(id_counts) > 0 else date_counts.index
upgini/utils/ts_utils.py CHANGED
@@ -8,23 +8,17 @@ def get_most_frequent_time_unit(df: pd.DataFrame, id_columns: List[str], date_co
8
8
  def closest_unit(diff):
9
9
  return pd.tseries.frequencies.to_offset(pd.Timedelta(diff, unit="s"))
10
10
 
11
- # Calculate differences for each ID group
12
11
  all_diffs = []
13
12
  groups = df.groupby(id_columns) if id_columns else [(None, df)]
14
13
  for _, group in groups:
15
- # Get sorted dates for this group
16
14
  group_dates = group[date_column].sort_values().unique()
17
15
  if len(group_dates) > 1:
18
- # Calculate time differences between consecutive dates
19
16
  diff_series = pd.Series(group_dates[1:] - group_dates[:-1])
20
- # Convert to nanoseconds
21
17
  diff_ns = diff_series.dt.total_seconds()
22
18
  all_diffs.extend(diff_ns)
23
19
 
24
- # Convert to series for easier processing
25
20
  all_diffs = pd.Series(all_diffs)
26
21
 
27
- # Get most common time unit across all groups
28
22
  most_frequent_unit = all_diffs.apply(closest_unit).mode().min()
29
23
 
30
24
  return most_frequent_unit if isinstance(most_frequent_unit, pd.DateOffset) else None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.60a3792.dev2
3
+ Version: 1.2.62a3818.dev1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -30,6 +30,7 @@ Requires-Dist: jarowinkler>=2.0.0
30
30
  Requires-Dist: levenshtein>=0.25.1
31
31
  Requires-Dist: numpy<=1.26.4,>=1.19.0
32
32
  Requires-Dist: pandas<3.0.0,>=1.1.0
33
+ Requires-Dist: psutil>=6.0.0
33
34
  Requires-Dist: pydantic<3.0.0,>1.0.0
34
35
  Requires-Dist: pyjwt>=2.8.0
35
36
  Requires-Dist: python-bidi==0.4.2
@@ -1,36 +1,37 @@
1
- upgini/__about__.py,sha256=8VHknGDFZaUmYuHXftocfi6wQkP1htM0HF9T5bSV43M,33
1
+ upgini/__about__.py,sha256=-inFSOjK0otU7oAU9xIxafvjGaGWyHQqEAz5nWw5yqI,33
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=OGjpeFHbj3lWiZTOHTpWEoMMDmFY1FlNC44FKktoZvU,34956
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=IXU6ahvQqMGLdZsrHCjOGEia1pBAgixfld3pNVPcGEM,202468
6
+ upgini/features_enricher.py,sha256=cB2I5rNpbztjkYEEW5aJuKj2fCMnfxp40X4Eo63oyuQ,205340
7
7
  upgini/http.py,sha256=ud0Cp7h0jNeHuuZGpU_1dAAEiabGoJjGxc1X5oeBQr4,43496
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
9
  upgini/metadata.py,sha256=Jh6YTaS00m_nbaOY_owvlSyn9zgkErkqu8iTr9ZjKI8,12279
10
- upgini/metrics.py,sha256=hr7UwLphbZ_FEglLuO2lzr_pFgxOJ4c3WBeg7H-fNqY,35521
10
+ upgini/metrics.py,sha256=t7uOOnlDYvP6E3DLjPMQcFBjyhJfUQY8aUlx7N0Mh-s,35477
11
11
  upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
12
12
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
13
13
  upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
14
14
  upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
15
15
  upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
16
16
  upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- upgini/autofe/all_operands.py,sha256=v0_NozalvvzeojSAA0d7UJ5INS654ZVaLn4S8djK6Ac,329
18
- upgini/autofe/binary.py,sha256=zMhtHVuGUAFLUqem-XiXqJj-GRXxS88tdz8tFuDfSNM,7659
19
- upgini/autofe/date.py,sha256=oykxfmny4LOr6m79IipOUCtk2JQSUdSCWHh8K9n7nek,10726
20
- upgini/autofe/feature.py,sha256=zvRdlxCkaOsX0XiragNvh0tAPyOWut0MQTq5JGU5HtY,14749
21
- upgini/autofe/groupby.py,sha256=G48_sQZw016eGx3cOy8YQrEIOp95puWqYUpFWd-gdeM,3595
22
- upgini/autofe/operand.py,sha256=8Ttrfxv_H91dMbS7J55zxluzAJHfGXU_Y2xCh4OHwb8,4774
23
- upgini/autofe/unary.py,sha256=T3E7F3dA_7o_rkdCFq7JV6nHLzcoHLHQTcxO7y5Opa4,4646
24
- upgini/autofe/vector.py,sha256=udkg4pP7IIeLjt0Cg6rzEKUmGaubOnqsEz3bz9R6E44,7110
17
+ upgini/autofe/all_operands.py,sha256=VIT5jCq5U-qypdNz1MIQ_hlIAs0ujJgRfKRUkU24nFs,332
18
+ upgini/autofe/binary.py,sha256=jsXa_zwlNWRmQAT5qipzU2Or03qae-a1kkY9yDECkq8,7660
19
+ upgini/autofe/date.py,sha256=bmoXU5vlDa1xsfCIFEC_VMRHOnV8Sy_KUMshqh0ARvA,10722
20
+ upgini/autofe/feature.py,sha256=n4sNNFM9b022AGJbW14AMRuERD9bwub-RWqa6hfLID0,14750
21
+ upgini/autofe/groupby.py,sha256=NN0T-tYbTHQDeCi2UZ06wVkDflm8DJBV4rdGrrVyVEE,3596
22
+ upgini/autofe/operator.py,sha256=VCGDUQ5bOtwX-jzmgHDrKF3GbglDumyEkvtLWTmSGQo,4776
23
+ upgini/autofe/timeseries.py,sha256=Pci7kNpFcViNZdIHlVTyxjoxzcMVdqUPopbPrJ3hE20,6593
24
+ upgini/autofe/unary.py,sha256=my7AYIrWCQPFxRtcphONmwieU5HpX4fHiKllFRCsMUk,4647
25
+ upgini/autofe/vector.py,sha256=5Lx2q_Np9PrMtZ_8O86xywq0s4XSQbooHxK3ufo3ANU,664
25
26
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
- upgini/data_source/data_source_publisher.py,sha256=0vaYz5v3KclJnA6jAWiTUiMQO5mbBTBINWV9jr2F5xM,22591
27
+ upgini/data_source/data_source_publisher.py,sha256=4S9qwlAklD8vg9tUU_c1pHE2_glUHAh15-wr5hMwKFw,22879
27
28
  upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
28
29
  upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
29
30
  upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
31
  upgini/normalizer/normalize_utils.py,sha256=Ft2MwSgVoBilXAORAOYAuwPD79GOLfwn4qQE3IUFzzg,7218
31
32
  upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
32
33
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
33
- upgini/resource_bundle/strings.properties,sha256=UXMiaFP3p-WdiXyZJN3O_OZstb-F33BWVDxDiofyxd4,27464
34
+ upgini/resource_bundle/strings.properties,sha256=3zctRNQDJ1STTvLUfryBT72wYeHYnrllV4rG1C3HtfI,27542
34
35
  upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
35
36
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
37
  upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
@@ -43,7 +44,7 @@ upgini/utils/blocked_time_series.py,sha256=Uqr3vp4YqNclj2-PzEYqVy763GSXHn86sbpIl
43
44
  upgini/utils/country_utils.py,sha256=lY-eXWwFVegdVENFttbvLcgGDjFO17Sex8hd2PyJaRk,6937
44
45
  upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
45
46
  upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
46
- upgini/utils/datetime_utils.py,sha256=RVAk4_rakK8X9zjybK3-rj0to0e3elye8tnBuA4wTWU,13491
47
+ upgini/utils/datetime_utils.py,sha256=_jq-kn_dGNFfs-DGXcWCGzy9bkplfAjrZ8SsmN28zXc,13535
47
48
  upgini/utils/deduplicate_utils.py,sha256=SMZx9IKIhWI5HqXepfKiQb3uDJrogQZtG6jcWuMo5Z4,8855
48
49
  upgini/utils/display_utils.py,sha256=DsBjJ8jEYAh8BPgfAbzq5imoGFV6IACP20PQ78BQCX0,11964
49
50
  upgini/utils/email_utils.py,sha256=pZ2vCfNxLIPUhxr0-OlABNXm12jjU44isBk8kGmqQzA,5277
@@ -52,15 +53,17 @@ upgini/utils/feature_info.py,sha256=0rOXSyCj-sw-8migWP0ge8qrOzGU50dQvH0JUJUrDfQ,
52
53
  upgini/utils/features_validator.py,sha256=lEfmk4DoxZ4ooOE1HC0ZXtUb_lFKRFHIrnFULZ4_rL8,3746
53
54
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
54
55
  upgini/utils/ip_utils.py,sha256=TSQ_qDsLlVnm09X1HacpabEf_HNqSWpxBF4Sdc2xs08,6580
56
+ upgini/utils/mstats.py,sha256=GjBAUacgfAoVQVFUrMiRYdVkmx93CIThLRNvYLLiV48,5765
55
57
  upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
56
58
  upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
57
59
  upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
58
60
  upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,44511
59
- upgini/utils/target_utils.py,sha256=gGIO40NmLdm1DWaZNPWPoCIPlLNIDZWFGjoPuhOlzAU,16573
61
+ upgini/utils/sort.py,sha256=w-CoT33W_53ekOROpKI_VRsRmiyWNr2b3IpE5_4MLLA,6395
62
+ upgini/utils/target_utils.py,sha256=b1GzO8_gMcwXSZ2v98CY50MJJBzKbWHId_BJGybXfkM,16579
60
63
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
61
- upgini/utils/ts_utils.py,sha256=_YbNVE144vtEPlvLpvPGguDNzrnUM9IIjdX2VQz4T7E,1671
64
+ upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
62
65
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
63
- upgini-1.2.60a3792.dev2.dist-info/METADATA,sha256=9ViiZMTzdvTgcXn1tdN-TUgwOW7ovx7JoOQk_VZfMf0,49065
64
- upgini-1.2.60a3792.dev2.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
65
- upgini-1.2.60a3792.dev2.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
66
- upgini-1.2.60a3792.dev2.dist-info/RECORD,,
66
+ upgini-1.2.62a3818.dev1.dist-info/METADATA,sha256=9mRM2yQ18CeOTHQ83UgVmItZ-npsZSla3illeXSpyTQ,49094
67
+ upgini-1.2.62a3818.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
68
+ upgini-1.2.62a3818.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
69
+ upgini-1.2.62a3818.dev1.dist-info/RECORD,,