upgini 1.2.113a3974.dev2__py3-none-any.whl → 1.2.114a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.113a3974.dev2
3
+ Version: 1.2.114a2
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -270,9 +270,9 @@ from upgini.metadata import SearchKey
270
270
  enricher = FeaturesEnricher(
271
271
  search_keys={
272
272
  "subscription_activation_date": SearchKey.DATE,
273
- "country": SearchKey.COUNTRY,
274
- "zip_code": SearchKey.POSTAL_CODE,
275
- "hashed_email": SearchKey.HEM,
273
+ "country": SearchKey.COUNTRY,
274
+ "zip_code": SearchKey.POSTAL_CODE,
275
+ "hashed_email": SearchKey.HEM,
276
276
  "last_visit_ip_address": SearchKey.IP,
277
277
  "registered_with_phone": SearchKey.PHONE
278
278
  })
@@ -358,9 +358,9 @@ from upgini.metadata import SearchKey
358
358
  enricher = FeaturesEnricher(
359
359
  search_keys={
360
360
  "subscription_activation_date": SearchKey.DATE,
361
- "country": SearchKey.COUNTRY,
362
- "zip_code": SearchKey.POSTAL_CODE,
363
- "hashed_email": SearchKey.HEM,
361
+ "country": SearchKey.COUNTRY,
362
+ "zip_code": SearchKey.POSTAL_CODE,
363
+ "hashed_email": SearchKey.HEM,
364
364
  "last_visit_ip_address": SearchKey.IP,
365
365
  "registered_with_phone": SearchKey.PHONE
366
366
  },
@@ -381,7 +381,7 @@ from upgini.metadata import SearchKey
381
381
  enricher = FeaturesEnricher(
382
382
  search_keys={
383
383
  "subscription_activation_date": SearchKey.DATE,
384
- "zip_code": SearchKey.POSTAL_CODE,
384
+ "zip_code": SearchKey.POSTAL_CODE,
385
385
  },
386
386
  country_code = "US",
387
387
  date_format = "%Y-%d-%m"
@@ -409,8 +409,8 @@ y = train_df["churn_flag"]
409
409
  enricher = FeaturesEnricher(
410
410
  search_keys={
411
411
  "subscription_activation_date": SearchKey.DATE,
412
- "country": SearchKey.COUNTRY,
413
- "zip_code": SearchKey.POSTAL_CODE
412
+ "country": SearchKey.COUNTRY,
413
+ "zip_code": SearchKey.POSTAL_CODE
414
414
  })
415
415
 
416
416
  # everything is ready to fit! For 200к records fitting should take around 10 minutes,
@@ -464,8 +464,8 @@ And then, for `transform` in a production ML pipeline, you'll get enrichment wit
464
464
  enricher = FeaturesEnricher(
465
465
  search_keys={
466
466
  "subscription_activation_date": SearchKey.DATE,
467
- "country": SearchKey.COUNTRY,
468
- "zip_code": SearchKey.POSTAL_CODE,
467
+ "country": SearchKey.COUNTRY,
468
+ "zip_code": SearchKey.POSTAL_CODE,
469
469
  },
470
470
  )
471
471
  ```
@@ -516,8 +516,8 @@ enricher = FeaturesEnricher(
516
516
  If you're working with multivariate time series, you should specify id columns of individual univariate series in `FeaturesEnricher`. For example, if you have a dataset predicting sales for different stores and products, you should specify store and product id columns as follows:
517
517
  ```python
518
518
  enricher = FeaturesEnricher(
519
- search_keys={
520
- "sales_date": SearchKey.DATE,
519
+ search_keys={
520
+ "sales_date": SearchKey.DATE,
521
521
  },
522
522
  id_columns=["store_id", "product_id"],
523
523
  cv=CVType.time_series
@@ -733,9 +733,22 @@ enricher.fit(
733
733
  )
734
734
  ```
735
735
  #### ⚠️ Requirements for out-of-time dataset
736
- - Same data schema as for search initialization dataset
736
+ - Same data schema as for search initialization X dataset
737
737
  - Pandas dataframe representation
738
738
 
739
+ There are 3 options to pass out-of-time without labels:
740
+ ```python
741
+ enricher.fit(
742
+ train_ids_and_features,
743
+ train_label,
744
+ eval_set = [
745
+ (eval_ids_and_features_1,), # Just tuple of 1 element
746
+ (eval_ids_and_features_2, None), # None as labels
747
+ (eval_ids_and_features_3, [np.nan] * len(eval_ids_and_features_3)), # List or Series of the same size as eval X
748
+ ]
749
+ )
750
+ ```
751
+
739
752
  ### Use custom loss function in feature selection & metrics calculation
740
753
 
741
754
  `FeaturesEnricher` can be initialized with additional string parameter `loss`.
@@ -797,7 +810,7 @@ enricher = FeaturesEnricher(
797
810
  enricher.fit(X, y)
798
811
  ```
799
812
 
800
- ## Turn off removing of target outliers
813
+ ### Turn off removing of target outliers
801
814
  Upgini detect rows with target outlier for regression tasks. By default such rows are dropped on metrics calculation. To turn off removing of target outlier rows use parameter `remove_outliers_calc_metrics=False` in fit, fit_transform or calculate_metrics methods:
802
815
 
803
816
  ```python
@@ -808,7 +821,7 @@ enricher = FeaturesEnricher(
808
821
  enricher.fit(X, y, remove_outliers_calc_metrics=False)
809
822
  ```
810
823
 
811
- ## Turn off generating features on search keys
824
+ ### Turn off generating features on search keys
812
825
  Upgini tries to generate features on email, date and datetime search keys. By default this generation is enabled. To disable it use parameter `generate_search_key_features` of FeaturesEnricher constructor:
813
826
 
814
827
  ```python
@@ -816,6 +829,7 @@ enricher = FeaturesEnricher(
816
829
  search_keys={"date": SearchKey.DATE},
817
830
  generate_search_key_features=False,
818
831
  )
832
+ ```
819
833
 
820
834
  ## 🔑 Open up all capabilities of Upgini
821
835
 
@@ -1,12 +1,12 @@
1
- upgini/__about__.py,sha256=ziYMT-cCb1zPGJYidvejUtxXlUCjQLvR25p82kAy21c,34
1
+ upgini/__about__.py,sha256=4kFyaxTINIEZUA2muRTgIvBBR0-MXqqL6_K0IAyiz-U,26
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
- upgini/dataset.py,sha256=xFi0a-A3uvtxVwFM6JOyitkEPd1I2slIBj5SWfys3hQ,32724
4
+ upgini/dataset.py,sha256=e0XVWXKO-gE6tUOdx-E0cl9M0fj4io4D0WoX_xoJJP4,34416
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=rfVdHgUYEq9saqhWcI04jUmNQcAAn5Kto4w3WpxlOpA,221762
6
+ upgini/features_enricher.py,sha256=f9VD6T4gRnE7huBjck3f3JeoGo4PY87c-KcflmFEkNQ,236427
7
7
  upgini/http.py,sha256=zeAZvT6IAzOs9jQ3WG8mJBANLajgvv2LZePFzKz004w,45482
8
- upgini/metadata.py,sha256=9_0lFEWPpIHRBW-xWYSEcwPzICTC6_bQ6dUUlE75Xns,12773
9
- upgini/metrics.py,sha256=V2SP6NS5bfFHzRqufeKVsCXME1yG4t_8Dmk2E3zKdYk,45715
8
+ upgini/metadata.py,sha256=sx4X9fPkyCgXB6FPk9Rq_S1Kx8ibkbaWA-qNDVCuSmg,12811
9
+ upgini/metrics.py,sha256=gjJDtlV6JrhUJumbNipdzjY4ojEupHGPihb9_VxjtWc,45939
10
10
  upgini/search_task.py,sha256=Q5HjBpLIB3OCxAD1zNv5yQ3ZNJx696WCK_-H35_y7Rs,17912
11
11
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
12
12
  upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -38,11 +38,11 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
38
38
  upgini/normalizer/normalize_utils.py,sha256=mDh2mBW3aQMB4EFP2aHbf2dGMVkOcWnp4sKKvKDBh8w,8511
39
39
  upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
40
40
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
41
- upgini/resource_bundle/strings.properties,sha256=NyxRwzehkrL5LMoVyjkhN811MvalepavNfjlC9ubE0Q,28677
41
+ upgini/resource_bundle/strings.properties,sha256=wdre7HIk_vAqsKkRN3GdT9eiiiIXFMVQfgCIGgEsaQk,29315
42
42
  upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
43
43
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
45
- upgini/sampler/random_under_sampler.py,sha256=TIbm7ATo-bCMF-IiS5sZeDC1ad1SYg0eY_rRmg84yIQ,4024
44
+ upgini/sampler/base.py,sha256=Fva2FEhLiNRPZ9Q6uOtJRtRzwsayjv7aphalAZO_4lc,6452
45
+ upgini/sampler/random_under_sampler.py,sha256=4mofmaRTmNwT_HqxecWJyfXdLKK0h9jMBwS46xdrIqE,4356
46
46
  upgini/sampler/utils.py,sha256=PYOk3kKSnFlyxcpdtDNLBEEhTB4lO_iP7pQHqeUcmAc,20211
47
47
  upgini/utils/Roboto-Regular.ttf,sha256=kqYnZjMRQMpbyLulIChCLSdgYa1XF8GsUIoRi2Gcauw,168260
48
48
  upgini/utils/__init__.py,sha256=O_KgzKiJjW3g4NoqZ7lAxUpoHcBi_gze6r3ndEjCH74,842
@@ -52,11 +52,11 @@ upgini/utils/country_utils.py,sha256=lY-eXWwFVegdVENFttbvLcgGDjFO17Sex8hd2PyJaRk
52
52
  upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
53
53
  upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
54
54
  upgini/utils/datetime_utils.py,sha256=UL1ernnawW0LV9mPDpCIc6sFy0HUhFscWVNwfH4V7rI,14366
55
- upgini/utils/deduplicate_utils.py,sha256=EpBVCov42-FJIAPfa4jY_ZRct3N2MFaC7i-oJNZ_MGI,8954
55
+ upgini/utils/deduplicate_utils.py,sha256=oZEiZeN-A92zwAPysV4OP9hO-niC2RLt-Dhc_hynBTU,11273
56
56
  upgini/utils/display_utils.py,sha256=Ou7dYdgvvdh443OgOLTM_xKwC2ITx9DQrpKoC2vCRYc,11856
57
57
  upgini/utils/email_utils.py,sha256=pZ2vCfNxLIPUhxr0-OlABNXm12jjU44isBk8kGmqQzA,5277
58
58
  upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
59
- upgini/utils/feature_info.py,sha256=b3RvAeOHSEu-ZXWTrf42Dll_3ZUBL0pw7sdk7hgUKD0,7284
59
+ upgini/utils/feature_info.py,sha256=6vihytwKma_TlXtTn4l6Aj4kqlOj0ouLy-yWVV6VUw8,7551
60
60
  upgini/utils/features_validator.py,sha256=lEfmk4DoxZ4ooOE1HC0ZXtUb_lFKRFHIrnFULZ4_rL8,3746
61
61
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
62
62
  upgini/utils/ip_utils.py,sha256=wmnnwVQdjX9o1cNQw6VQMk6maHhvsq6hNsZBYf9knrw,6585
@@ -64,14 +64,15 @@ upgini/utils/mstats.py,sha256=u3gQVUtDRbyrOQK6V1UJ2Rx1QbkSNYGjXa6m3Z_dPVs,6286
64
64
  upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
65
65
  upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
66
66
  upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
67
- upgini/utils/sample_utils.py,sha256=lZJ4yf9Jiq9Em2Ny9m3RIiF7WSxBPrc4E3xxn_8sQk8,15417
67
+ upgini/utils/psi.py,sha256=pLtECcCeco_WRqMjFnQvhUB4vHArjHtD5HzJFP9ICMc,10972
68
+ upgini/utils/sample_utils.py,sha256=_bA6KrPiwtVELsyXUmzwMWpnJPrVX__LbyUGCCxFNqU,16543
68
69
  upgini/utils/sklearn_ext.py,sha256=jLJWAKkqQinV15Z4y1ZnsN3c-fKFwXTsprs00COnyVU,49315
69
70
  upgini/utils/sort.py,sha256=8uuHs2nfSMVnz8GgvbOmgMB1PgEIZP1uhmeRFxcwnYw,7039
70
- upgini/utils/target_utils.py,sha256=i3Xt5l9ybB2_nF_ma5cfPuL3OeFTs2dY2xDI0p4Azpg,9049
71
+ upgini/utils/target_utils.py,sha256=dd8w1AQjEW-BMfoCVxEZidW1BIX6x1WT7_iwkmYyjRM,9272
71
72
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
72
73
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
73
74
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
74
- upgini-1.2.113a3974.dev2.dist-info/METADATA,sha256=RC2p2RrCBlPWX6hGAcLGtt-k6wOmmq2DFhetxg3LvGk,49539
75
- upgini-1.2.113a3974.dev2.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
76
- upgini-1.2.113a3974.dev2.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
77
- upgini-1.2.113a3974.dev2.dist-info/RECORD,,
75
+ upgini-1.2.114a2.dist-info/METADATA,sha256=xjgI57bA9XKWyeoDmR-9zt2atbTmt1rh0iCPy-UMxAU,49865
76
+ upgini-1.2.114a2.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
77
+ upgini-1.2.114a2.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
78
+ upgini-1.2.114a2.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.24.2
2
+ Generator: hatchling 1.25.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any