upgini 1.2.41a3758.dev1__tar.gz → 1.2.42__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (67) hide show
  1. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/PKG-INFO +3 -2
  2. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/README.md +2 -1
  3. upgini-1.2.42/src/upgini/__about__.py +1 -0
  4. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/features_enricher.py +35 -30
  5. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/resource_bundle/strings.properties +1 -1
  6. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/ip_utils.py +5 -5
  7. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/version_validator.py +1 -1
  8. upgini-1.2.41a3758.dev1/src/upgini/__about__.py +0 -1
  9. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/.gitignore +0 -0
  10. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/LICENSE +0 -0
  11. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/pyproject.toml +0 -0
  12. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/__init__.py +0 -0
  13. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/ads.py +0 -0
  14. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/ads_management/__init__.py +0 -0
  15. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/ads_management/ads_manager.py +0 -0
  16. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/autofe/__init__.py +0 -0
  17. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/autofe/all_operands.py +0 -0
  18. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/autofe/binary.py +0 -0
  19. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/autofe/date.py +0 -0
  20. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/autofe/feature.py +0 -0
  21. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/autofe/groupby.py +0 -0
  22. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/autofe/operand.py +0 -0
  23. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/autofe/unary.py +0 -0
  24. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/autofe/vector.py +0 -0
  25. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/data_source/__init__.py +0 -0
  26. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/data_source/data_source_publisher.py +0 -0
  27. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/dataset.py +0 -0
  28. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/errors.py +0 -0
  29. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/http.py +0 -0
  30. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/lazy_import.py +0 -0
  31. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/mdc/__init__.py +0 -0
  32. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/mdc/context.py +0 -0
  33. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/metadata.py +0 -0
  34. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/metrics.py +0 -0
  35. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/normalizer/__init__.py +0 -0
  36. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/normalizer/normalize_utils.py +0 -0
  37. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/resource_bundle/__init__.py +0 -0
  38. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/resource_bundle/exceptions.py +0 -0
  39. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  40. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/sampler/__init__.py +0 -0
  41. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/sampler/base.py +0 -0
  42. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/sampler/random_under_sampler.py +0 -0
  43. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/sampler/utils.py +0 -0
  44. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/search_task.py +0 -0
  45. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/spinner.py +0 -0
  46. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  47. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/__init__.py +0 -0
  48. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/base_search_key_detector.py +0 -0
  49. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/blocked_time_series.py +0 -0
  50. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/country_utils.py +0 -0
  51. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/custom_loss_utils.py +0 -0
  52. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/cv_utils.py +0 -0
  53. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/datetime_utils.py +0 -0
  54. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/deduplicate_utils.py +0 -0
  55. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/display_utils.py +0 -0
  56. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/email_utils.py +0 -0
  57. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/fallback_progress_bar.py +0 -0
  58. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/feature_info.py +0 -0
  59. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/features_validator.py +0 -0
  60. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/format.py +0 -0
  61. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/phone_utils.py +0 -0
  62. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/postal_code_utils.py +0 -0
  63. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/progress_bar.py +0 -0
  64. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/sklearn_ext.py +0 -0
  65. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/target_utils.py +0 -0
  66. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/track_info.py +0 -0
  67. {upgini-1.2.41a3758.dev1 → upgini-1.2.42}/src/upgini/utils/warning_counter.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.41a3758.dev1
3
+ Version: 1.2.42
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -164,11 +164,12 @@ Run [Feature search & generation notebook](https://github.com/upgini/upgini/blob
164
164
 
165
165
  * The goal is to **predict future sales of different goods in stores** based on a 5-year history of sales.
166
166
  * Kaggle Competition [Store Item Demand Forecasting Challenge](https://www.kaggle.com/c/demand-forecasting-kernels-only) is a product sales forecasting. The evaluation metric is [SMAPE](https://en.wikipedia.org/wiki/Symmetric_mean_absolute_percentage_error).
167
- <!--
167
+
168
168
  Run [Simple sales prediction for retail stores](https://github.com/upgini/upgini/blob/main/notebooks/kaggle_example.ipynb) inside your browser:
169
169
 
170
170
  [![Open example in Google Colab](https://img.shields.io/badge/run_example_in-colab-blue?style=for-the-badge&logo=googlecolab)](https://colab.research.google.com/github/upgini/upgini/blob/main/notebooks/kaggle_example.ipynb)
171
171
  &nbsp;
172
+ <!--
172
173
  [![Open in Binder](https://img.shields.io/badge/run_example_in-mybinder-red.svg?style=for-the-badge&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAFkAAABZCAMAAABi1XidAAAB8lBMVEX///9XmsrmZYH1olJXmsr1olJXmsrmZYH1olJXmsr1olJXmsrmZYH1olL1olJXmsr1olJXmsrmZYH1olL1olJXmsrmZYH1olJXmsr1olL1olJXmsrmZYH1olL1olJXmsrmZYH1olL1olL0nFf1olJXmsrmZYH1olJXmsq8dZb1olJXmsrmZYH1olJXmspXmspXmsr1olL1olJXmsrmZYH1olJXmsr1olL1olJXmsrmZYH1olL1olLeaIVXmsrmZYH1olL1olL1olJXmsrmZYH1olLna31Xmsr1olJXmsr1olJXmsrmZYH1olLqoVr1olJXmsr1olJXmsrmZYH1olL1olKkfaPobXvviGabgadXmsqThKuofKHmZ4Dobnr1olJXmsr1olJXmspXmsr1olJXmsrfZ4TuhWn1olL1olJXmsqBi7X1olJXmspZmslbmMhbmsdemsVfl8ZgmsNim8Jpk8F0m7R4m7F5nLB6jbh7jbiDirOEibOGnKaMhq+PnaCVg6qWg6qegKaff6WhnpKofKGtnomxeZy3noG6dZi+n3vCcpPDcpPGn3bLb4/Mb47UbIrVa4rYoGjdaIbeaIXhoWHmZYHobXvpcHjqdHXreHLroVrsfG/uhGnuh2bwj2Hxk17yl1vzmljzm1j0nlX1olL3AJXWAAAAbXRSTlMAEBAQHx8gICAuLjAwMDw9PUBAQEpQUFBXV1hgYGBkcHBwcXl8gICAgoiIkJCQlJicnJ2goKCmqK+wsLC4usDAwMjP0NDQ1NbW3Nzg4ODi5+3v8PDw8/T09PX29vb39/f5+fr7+/z8/Pz9/v7+zczCxgAABC5JREFUeAHN1ul3k0UUBvCb1CTVpmpaitAGSLSpSuKCLWpbTKNJFGlcSMAFF63iUmRccNG6gLbuxkXU66JAUef/9LSpmXnyLr3T5AO/rzl5zj137p136BISy44fKJXuGN/d19PUfYeO67Znqtf2KH33Id1psXoFdW30sPZ1sMvs2D060AHqws4FHeJojLZqnw53cmfvg+XR8mC0OEjuxrXEkX5ydeVJLVIlV0e10PXk5k7dYeHu7Cj1j+49uKg7uLU61tGLw1lq27ugQYlclHC4bgv7VQ+TAyj5Zc/UjsPvs1sd5cWryWObtvWT2EPa4rtnWW3JkpjggEpbOsPr7F7EyNewtpBIslA7p43HCsnwooXTEc3UmPmCNn5lrqTJxy6nRmcavGZVt/3Da2pD5NHvsOHJCrdc1G2r3DITpU7yic7w/7Rxnjc0kt5GC4djiv2Sz3Fb2iEZg41/ddsFDoyuYrIkmFehz0HR2thPgQqMyQYb2OtB0WxsZ3BeG3+wpRb1vzl2UYBog8FfGhttFKjtAclnZYrRo9ryG9uG/FZQU4AEg8ZE9LjGMzTmqKXPLnlWVnIlQQTvxJf8ip7VgjZjyVPrjw1te5otM7RmP7xm+sK2Gv9I8Gi++BRbEkR9EBw8zRUcKxwp73xkaLiqQb+kGduJTNHG72zcW9LoJgqQxpP3/Tj//c3yB0tqzaml05/+orHLksVO+95kX7/7qgJvnjlrfr2Ggsyx0eoy9uPzN5SPd86aXggOsEKW2Prz7du3VID3/tzs/sSRs2w7ovVHKtjrX2pd7ZMlTxAYfBAL9jiDwfLkq55Tm7ifhMlTGPyCAs7RFRhn47JnlcB9RM5T97ASuZXIcVNuUDIndpDbdsfrqsOppeXl5Y+XVKdjFCTh+zGaVuj0d9zy05PPK3QzBamxdwtTCrzyg/2Rvf2EstUjordGwa/kx9mSJLr8mLLtCW8HHGJc2R5hS219IiF6PnTusOqcMl57gm0Z8kanKMAQg0qSyuZfn7zItsbGyO9QlnxY0eCuD1XL2ys/MsrQhltE7Ug0uFOzufJFE2PxBo/YAx8XPPdDwWN0MrDRYIZF0mSMKCNHgaIVFoBbNoLJ7tEQDKxGF0kcLQimojCZopv0OkNOyWCCg9XMVAi7ARJzQdM2QUh0gmBozjc3Skg6dSBRqDGYSUOu66Zg+I2fNZs/M3/f/Grl/XnyF1Gw3VKCez0PN5IUfFLqvgUN4C0qNqYs5YhPL+aVZYDE4IpUk57oSFnJm4FyCqqOE0jhY2SMyLFoo56zyo6becOS5UVDdj7Vih0zp+tcMhwRpBeLyqtIjlJKAIZSbI8SGSF3k0pA3mR5tHuwPFoa7N7reoq2bqCsAk1HqCu5uvI1n6JuRXI+S1Mco54YmYTwcn6Aeic+kssXi8XpXC4V3t7/ADuTNKaQJdScAAAAAElFTkSuQmCC)](https://mybinder.org/v2/gh/upgini/upgini/main?urlpath=notebooks%2Fnotebooks%2Fkaggle_example.ipynb)
173
174
  &nbsp;
174
175
  [![Open example in Gitpod](https://img.shields.io/badge/run_example_in-gitpod-orange?style=for-the-badge&logo=gitpod)](https://gitpod.io/#/github.com/upgini/upgini)
@@ -122,11 +122,12 @@ Run [Feature search & generation notebook](https://github.com/upgini/upgini/blob
122
122
 
123
123
  * The goal is to **predict future sales of different goods in stores** based on a 5-year history of sales.
124
124
  * Kaggle Competition [Store Item Demand Forecasting Challenge](https://www.kaggle.com/c/demand-forecasting-kernels-only) is a product sales forecasting. The evaluation metric is [SMAPE](https://en.wikipedia.org/wiki/Symmetric_mean_absolute_percentage_error).
125
- <!--
125
+
126
126
  Run [Simple sales prediction for retail stores](https://github.com/upgini/upgini/blob/main/notebooks/kaggle_example.ipynb) inside your browser:
127
127
 
128
128
  [![Open example in Google Colab](https://img.shields.io/badge/run_example_in-colab-blue?style=for-the-badge&logo=googlecolab)](https://colab.research.google.com/github/upgini/upgini/blob/main/notebooks/kaggle_example.ipynb)
129
129
  &nbsp;
130
+ <!--
130
131
  [![Open in Binder](https://img.shields.io/badge/run_example_in-mybinder-red.svg?style=for-the-badge&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAFkAAABZCAMAAABi1XidAAAB8lBMVEX///9XmsrmZYH1olJXmsr1olJXmsrmZYH1olJXmsr1olJXmsrmZYH1olL1olJXmsr1olJXmsrmZYH1olL1olJXmsrmZYH1olJXmsr1olL1olJXmsrmZYH1olL1olJXmsrmZYH1olL1olL0nFf1olJXmsrmZYH1olJXmsq8dZb1olJXmsrmZYH1olJXmspXmspXmsr1olL1olJXmsrmZYH1olJXmsr1olL1olJXmsrmZYH1olL1olLeaIVXmsrmZYH1olL1olL1olJXmsrmZYH1olLna31Xmsr1olJXmsr1olJXmsrmZYH1olLqoVr1olJXmsr1olJXmsrmZYH1olL1olKkfaPobXvviGabgadXmsqThKuofKHmZ4Dobnr1olJXmsr1olJXmspXmsr1olJXmsrfZ4TuhWn1olL1olJXmsqBi7X1olJXmspZmslbmMhbmsdemsVfl8ZgmsNim8Jpk8F0m7R4m7F5nLB6jbh7jbiDirOEibOGnKaMhq+PnaCVg6qWg6qegKaff6WhnpKofKGtnomxeZy3noG6dZi+n3vCcpPDcpPGn3bLb4/Mb47UbIrVa4rYoGjdaIbeaIXhoWHmZYHobXvpcHjqdHXreHLroVrsfG/uhGnuh2bwj2Hxk17yl1vzmljzm1j0nlX1olL3AJXWAAAAbXRSTlMAEBAQHx8gICAuLjAwMDw9PUBAQEpQUFBXV1hgYGBkcHBwcXl8gICAgoiIkJCQlJicnJ2goKCmqK+wsLC4usDAwMjP0NDQ1NbW3Nzg4ODi5+3v8PDw8/T09PX29vb39/f5+fr7+/z8/Pz9/v7+zczCxgAABC5JREFUeAHN1ul3k0UUBvCb1CTVpmpaitAGSLSpSuKCLWpbTKNJFGlcSMAFF63iUmRccNG6gLbuxkXU66JAUef/9LSpmXnyLr3T5AO/rzl5zj137p136BISy44fKJXuGN/d19PUfYeO67Znqtf2KH33Id1psXoFdW30sPZ1sMvs2D060AHqws4FHeJojLZqnw53cmfvg+XR8mC0OEjuxrXEkX5ydeVJLVIlV0e10PXk5k7dYeHu7Cj1j+49uKg7uLU61tGLw1lq27ugQYlclHC4bgv7VQ+TAyj5Zc/UjsPvs1sd5cWryWObtvWT2EPa4rtnWW3JkpjggEpbOsPr7F7EyNewtpBIslA7p43HCsnwooXTEc3UmPmCNn5lrqTJxy6nRmcavGZVt/3Da2pD5NHvsOHJCrdc1G2r3DITpU7yic7w/7Rxnjc0kt5GC4djiv2Sz3Fb2iEZg41/ddsFDoyuYrIkmFehz0HR2thPgQqMyQYb2OtB0WxsZ3BeG3+wpRb1vzl2UYBog8FfGhttFKjtAclnZYrRo9ryG9uG/FZQU4AEg8ZE9LjGMzTmqKXPLnlWVnIlQQTvxJf8ip7VgjZjyVPrjw1te5otM7RmP7xm+sK2Gv9I8Gi++BRbEkR9EBw8zRUcKxwp73xkaLiqQb+kGduJTNHG72zcW9LoJgqQxpP3/Tj//c3yB0tqzaml05/+orHLksVO+95kX7/7qgJvnjlrfr2Ggsyx0eoy9uPzN5SPd86aXggOsEKW2Prz7du3VID3/tzs/sSRs2w7ovVHKtjrX2pd7ZMlTxAYfBAL9jiDwfLkq55Tm7ifhMlTGPyCAs7RFRhn47JnlcB9RM5T97ASuZXIcVNuUDIndpDbdsfrqsOppeXl5Y+XVKdjFCTh+zGaVuj0d9zy05PPK3QzBamxdwtTCrzyg/2Rvf2EstUjordGwa/kx9mSJLr8mLLtCW8HHGJc2R5hS219IiF6PnTusOqcMl57gm0Z8kanKMAQg0qSyuZfn7zItsbGyO9QlnxY0eCuD1XL2ys/MsrQhltE7Ug0uFOzufJFE2PxBo/YAx8XPPdDwWN0MrDRYIZF0mSMKCNHgaIVFoBbNoLJ7tEQDKxGF0kcLQimojCZopv0OkNOyWCCg9XMVAi7ARJzQdM2QUh0gmBozjc3Skg6dSBRqDGYSUOu66Zg+I2fNZs/M3/f/Grl/XnyF1Gw3VKCez0PN5IUfFLqvgUN4C0qNqYs5YhPL+aVZYDE4IpUk57oSFnJm4FyCqqOE0jhY2SMyLFoo56zyo6becOS5UVDdj7Vih0zp+tcMhwRpBeLyqtIjlJKAIZSbI8SGSF3k0pA3mR5tHuwPFoa7N7reoq2bqCsAk1HqCu5uvI1n6JuRXI+S1Mco54YmYTwcn6Aeic+kssXi8XpXC4V3t7/ADuTNKaQJdScAAAAAElFTkSuQmCC)](https://mybinder.org/v2/gh/upgini/upgini/main?urlpath=notebooks%2Fnotebooks%2Fkaggle_example.ipynb)
131
132
  &nbsp;
132
133
  [![Open example in Gitpod](https://img.shields.io/badge/run_example_in-gitpod-orange?style=for-the-badge&logo=gitpod)](https://gitpod.io/#/github.com/upgini/upgini)
@@ -0,0 +1 @@
1
+ __version__ = "1.2.42"
@@ -165,6 +165,10 @@ class FeaturesEnricher(TransformerMixin):
165
165
 
166
166
  shared_datasets: list of str, optional (default=None)
167
167
  List of private shared dataset ids for custom search
168
+
169
+ select_features: bool, optional (default=False)
170
+ If True, return only selected features both from input and data sources.
171
+ Otherwise, return all features from input and only selected features from data sources.
168
172
  """
169
173
 
170
174
  TARGET_NAME = "target"
@@ -231,6 +235,7 @@ class FeaturesEnricher(TransformerMixin):
231
235
  client_visitorid: Optional[str] = None,
232
236
  custom_bundle_config: Optional[str] = None,
233
237
  add_date_if_missing: bool = True,
238
+ select_features: bool = False,
234
239
  disable_force_downsampling: bool = False,
235
240
  id_columns: Optional[List[str]] = None,
236
241
  **kwargs,
@@ -292,6 +297,7 @@ class FeaturesEnricher(TransformerMixin):
292
297
  self.dropped_client_feature_names_ = []
293
298
  self.feature_importances_ = []
294
299
  self.search_id = search_id
300
+ self.select_features = select_features
295
301
  self.disable_force_downsampling = disable_force_downsampling
296
302
 
297
303
  if search_id:
@@ -399,7 +405,6 @@ class FeaturesEnricher(TransformerMixin):
399
405
  remove_outliers_calc_metrics: Optional[bool] = None,
400
406
  progress_callback: Optional[Callable[[SearchProgress], Any]] = None,
401
407
  search_id_callback: Optional[Callable[[str], Any]] = None,
402
- select_features: bool = False,
403
408
  **kwargs,
404
409
  ):
405
410
  """Fit to data.
@@ -435,10 +440,6 @@ class FeaturesEnricher(TransformerMixin):
435
440
 
436
441
  remove_outliers_calc_metrics, optional (default=True)
437
442
  If True then rows with target ouliers will be dropped on metrics calculation
438
-
439
- select_features: bool, optional (default=False)
440
- If True, return only selected features both from input and data sources.
441
- Otherwise, return all features from input and only selected features from data sources.
442
443
  """
443
444
  trace_id = str(uuid.uuid4())
444
445
  start_time = time.time()
@@ -473,7 +474,6 @@ class FeaturesEnricher(TransformerMixin):
473
474
  self.y = y
474
475
  self.eval_set = self._check_eval_set(eval_set, X, self.bundle)
475
476
  self.dump_input(trace_id, X, y, self.eval_set)
476
- self.__set_select_features(select_features)
477
477
  self.__inner_fit(
478
478
  trace_id,
479
479
  X,
@@ -523,10 +523,6 @@ class FeaturesEnricher(TransformerMixin):
523
523
  finally:
524
524
  self.logger.info(f"Fit elapsed time: {time.time() - start_time}")
525
525
 
526
- def __set_select_features(self, select_features: bool):
527
- self.fit_select_features = select_features
528
- self.runtime_parameters.properties["select_features"] = select_features
529
-
530
526
  def fit_transform(
531
527
  self,
532
528
  X: Union[pd.DataFrame, pd.Series, np.ndarray],
@@ -542,7 +538,6 @@ class FeaturesEnricher(TransformerMixin):
542
538
  estimator: Optional[Any] = None,
543
539
  remove_outliers_calc_metrics: Optional[bool] = None,
544
540
  progress_callback: Optional[Callable[[SearchProgress], Any]] = None,
545
- select_features: bool = False,
546
541
  **kwargs,
547
542
  ) -> pd.DataFrame:
548
543
  """Fit to data, then transform it.
@@ -583,10 +578,6 @@ class FeaturesEnricher(TransformerMixin):
583
578
  remove_outliers_calc_metrics, optional (default=True)
584
579
  If True then rows with target ouliers will be dropped on metrics calculation
585
580
 
586
- select_features: bool, optional (default=False)
587
- If True, return only selected features both from input and data sources.
588
- Otherwise, return all features from input and only selected features from data sources.
589
-
590
581
  Returns
591
582
  -------
592
583
  X_new: pandas.DataFrame of shape (n_samples, n_features_new)
@@ -621,7 +612,6 @@ class FeaturesEnricher(TransformerMixin):
621
612
  self.X = X
622
613
  self.y = y
623
614
  self.eval_set = self._check_eval_set(eval_set, X, self.bundle)
624
- self.__set_select_features(select_features)
625
615
  self.dump_input(trace_id, X, y, self.eval_set)
626
616
 
627
617
  if _num_samples(drop_duplicates(X)) > Dataset.MAX_ROWS:
@@ -1241,11 +1231,8 @@ class FeaturesEnricher(TransformerMixin):
1241
1231
  self.logger.info(f"Calculating metrics elapsed time: {time.time() - start_time}")
1242
1232
 
1243
1233
  def _update_shap_values(self, trace_id: str, x_columns: List[str], new_shaps: Dict[str, float]):
1244
- renaming = self.fit_columns_renaming or {}
1245
1234
  new_shaps = {
1246
- renaming.get(feature, feature): _round_shap_value(shap)
1247
- for feature, shap in new_shaps.items()
1248
- if feature in self.feature_names_ or renaming.get(feature, feature) in self.feature_names_
1235
+ feature: _round_shap_value(shap) for feature, shap in new_shaps.items() if feature in self.feature_names_
1249
1236
  }
1250
1237
  self.__prepare_feature_importances(trace_id, x_columns, new_shaps, silent=True)
1251
1238
 
@@ -1474,7 +1461,7 @@ class FeaturesEnricher(TransformerMixin):
1474
1461
  c
1475
1462
  for c in X_sampled.columns.to_list()
1476
1463
  if (
1477
- not self.fit_select_features
1464
+ not self.select_features
1478
1465
  or c in self.feature_names_
1479
1466
  or (self.fit_columns_renaming is not None and self.fit_columns_renaming.get(c) in self.feature_names_)
1480
1467
  )
@@ -2049,14 +2036,13 @@ class FeaturesEnricher(TransformerMixin):
2049
2036
  file_metadata = self._search_task.get_file_metadata(str(uuid.uuid4()))
2050
2037
  search_keys = file_metadata.search_types()
2051
2038
  if SearchKey.IPV6_ADDRESS in search_keys:
2052
- # search_keys.remove(SearchKey.IPV6_ADDRESS)
2053
2039
  search_keys.pop(SearchKey.IPV6_ADDRESS, None)
2054
-
2040
+ original_names = {c.name: c.originalName for c in file_metadata.columns}
2055
2041
  keys = (
2056
2042
  "{"
2057
2043
  + ", ".join(
2058
2044
  [
2059
- f'"{key.name}": {{"name": "{name}", "value": "{key_example(key)}"}}'
2045
+ f'"{key.name}": {{"name": "{original_names.get(name, name)}", "value": "{key_example(key)}"}}'
2060
2046
  for key, name in search_keys.items()
2061
2047
  ]
2062
2048
  )
@@ -2076,10 +2062,27 @@ class FeaturesEnricher(TransformerMixin):
2076
2062
  features_section = ""
2077
2063
 
2078
2064
  search_id = self._search_task.search_task_id
2079
- api_example = f"""curl 'https://search.upgini.com/online/api/http_inference_trigger?search_id={search_id}' \\
2065
+ api_example = f"""
2066
+ {Format.BOLD}Shell{Format.END}:
2067
+
2068
+ curl 'https://search.upgini.com/online/api/http_inference_trigger?search_id={search_id}' \\
2080
2069
  -H 'Authorization: {self.api_key}' \\
2081
2070
  -H 'Content-Type: application/json' \\
2082
- -d '{{"search_keys": {keys}{features_section}, "only_online_sources": {str(only_online_sources).lower()}}}'"""
2071
+ -d '{{"search_keys": {keys}{features_section}, "only_online_sources": {str(only_online_sources).lower()}}}'
2072
+
2073
+ {Format.BOLD}Python{Format.END}:
2074
+
2075
+ import requests
2076
+
2077
+ response = requests.post(
2078
+ url='https://search.upgini.com/online/api/http_inference_trigger?search_id={search_id}',
2079
+ headers={{'Authorization': '{self.api_key}'}},
2080
+ json={{"search_keys": {keys}{features_section}, "only_online_sources": {only_online_sources}}}
2081
+ )
2082
+ if response.status_code == 200:
2083
+ print(response.json())
2084
+ """
2085
+
2083
2086
  return api_example
2084
2087
 
2085
2088
  def _get_copy_of_runtime_parameters(self) -> RuntimeParameters:
@@ -2524,9 +2527,11 @@ class FeaturesEnricher(TransformerMixin):
2524
2527
  def __is_registered(self) -> bool:
2525
2528
  return self.api_key is not None and self.api_key != ""
2526
2529
 
2527
- def __log_warning(self, message: str, show_support_link: bool = False):
2530
+ def __log_warning(self, message: str, show_support_link: bool = False, is_red=False):
2528
2531
  warning_num = self.warning_counter.increment()
2529
2532
  formatted_message = f"WARNING #{warning_num}: {message}\n"
2533
+ if is_red:
2534
+ formatted_message = Format.RED + formatted_message + Format.END
2530
2535
  if show_support_link:
2531
2536
  self.__display_support_link(formatted_message)
2532
2537
  else:
@@ -3300,8 +3305,8 @@ class FeaturesEnricher(TransformerMixin):
3300
3305
  f"Client ip: {self.client_ip}\n"
3301
3306
  f"Client visitorId: {self.client_visitorid}\n"
3302
3307
  f"Add date if missing: {self.add_date_if_missing}\n"
3308
+ f"Select features: {self.select_features}\n"
3303
3309
  f"Disable force downsampling: {self.disable_force_downsampling}\n"
3304
- f"Id columns: {self.id_columns}\n"
3305
3310
  )
3306
3311
 
3307
3312
  def sample(df):
@@ -3688,7 +3693,7 @@ class FeaturesEnricher(TransformerMixin):
3688
3693
  is_client_feature = feature_meta.name in x_columns
3689
3694
 
3690
3695
  if feature_meta.shap_value == 0.0:
3691
- if self.fit_select_features:
3696
+ if self.select_features:
3692
3697
  self.dropped_client_feature_names_.append(feature_meta.name)
3693
3698
  continue
3694
3699
 
@@ -3697,7 +3702,7 @@ class FeaturesEnricher(TransformerMixin):
3697
3702
  feature_meta.name in self.fit_generated_features
3698
3703
  or feature_meta.name == COUNTRY
3699
3704
  # In select_features mode we select also from etalon features and need to show them
3700
- or (not self.fit_select_features and is_client_feature)
3705
+ or (not self.select_features and is_client_feature)
3701
3706
  ):
3702
3707
  continue
3703
3708
 
@@ -216,7 +216,7 @@ imbalanced_target=\nTarget is imbalanced and will be undersampled. Frequency of
216
216
  loss_selection_info=Using loss `{}` for feature selection
217
217
  loss_calc_metrics_info=Using loss `{}` for metrics calculation with default estimator
218
218
  forced_balance_undersample=For quick data retrieval, your dataset has been sampled. To use data search without data sampling please contact support (sales@upgini.com)
219
- online_api_features_transform=Please note that some of the selected features {} are provided through a slow enrichment interface and are not available via transformation. However, they can be accessed via the API:
219
+ online_api_features_transform=Please note that some of the selected features {} are provided through a slow enrichment interface and are not available via transformation. \nHowever, they can be accessed via the API:
220
220
 
221
221
  # Validation table
222
222
  validation_column_name_header=Column name
@@ -110,16 +110,16 @@ class IpSearchKeyConverter:
110
110
  .astype("string")
111
111
  # .str.replace(".0", "", regex=False)
112
112
  )
113
- ip_binary = self.ip_column + "_binary"
114
- df[ip_binary] = df[self.ip_column].apply(self._ip_to_binary)
113
+ # ip_binary = self.ip_column + "_binary"
114
+ # df[ip_binary] = df[self.ip_column].apply(self._ip_to_binary)
115
115
 
116
116
  df = df.drop(columns=self.ip_column)
117
117
  del self.search_keys[self.ip_column]
118
118
  del self.columns_renaming[self.ip_column]
119
119
  self.search_keys[ipv6] = SearchKey.IPV6_ADDRESS
120
- self.search_keys[ip_binary] = SearchKey.IP_BINARY
121
- self.columns_renaming[ipv6] = original_ip # could be __unnest_ip...
122
- self.columns_renaming[ip_binary] = original_ip
120
+ # self.search_keys[ip_binary] = SearchKey.IP_BINARY
121
+ self.columns_renaming[ipv6] = original_ip
122
+ # self.columns_renaming[ip_binary] = original_ip
123
123
 
124
124
  return df
125
125
 
@@ -39,7 +39,7 @@ def validate_version(logger: logging.Logger, warning_function: Optional[Callable
39
39
  if current_version < latest_version:
40
40
  msg = bundle.get("version_warning").format(current_version, latest_version)
41
41
  if warning_function:
42
- warning_function(msg)
42
+ warning_function(msg, is_red=True)
43
43
  else:
44
44
  logger.warning(msg)
45
45
  print(msg)
@@ -1 +0,0 @@
1
- __version__ = "1.2.41a3758.dev1"
File without changes
File without changes