upgini 1.2.114a5__tar.gz → 1.2.114a6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {upgini-1.2.114a5 → upgini-1.2.114a6}/PKG-INFO +31 -1
  2. {upgini-1.2.114a5 → upgini-1.2.114a6}/README.md +30 -0
  3. upgini-1.2.114a6/src/upgini/__about__.py +1 -0
  4. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/features_enricher.py +4 -4
  5. upgini-1.2.114a5/src/upgini/__about__.py +0 -1
  6. {upgini-1.2.114a5 → upgini-1.2.114a6}/.gitignore +0 -0
  7. {upgini-1.2.114a5 → upgini-1.2.114a6}/LICENSE +0 -0
  8. {upgini-1.2.114a5 → upgini-1.2.114a6}/pyproject.toml +0 -0
  9. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/__init__.py +0 -0
  10. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/ads.py +0 -0
  11. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/ads_management/__init__.py +0 -0
  12. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/ads_management/ads_manager.py +0 -0
  13. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/autofe/__init__.py +0 -0
  14. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/autofe/all_operators.py +0 -0
  15. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/autofe/binary.py +0 -0
  16. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/autofe/date.py +0 -0
  17. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/autofe/feature.py +0 -0
  18. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/autofe/groupby.py +0 -0
  19. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/autofe/operator.py +0 -0
  20. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/autofe/timeseries/__init__.py +0 -0
  21. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/autofe/timeseries/base.py +0 -0
  22. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/autofe/timeseries/cross.py +0 -0
  23. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/autofe/timeseries/delta.py +0 -0
  24. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/autofe/timeseries/lag.py +0 -0
  25. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/autofe/timeseries/roll.py +0 -0
  26. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/autofe/timeseries/trend.py +0 -0
  27. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/autofe/timeseries/volatility.py +0 -0
  28. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/autofe/unary.py +0 -0
  29. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/autofe/utils.py +0 -0
  30. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/autofe/vector.py +0 -0
  31. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/data_source/__init__.py +0 -0
  32. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/data_source/data_source_publisher.py +0 -0
  33. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/dataset.py +0 -0
  34. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/errors.py +0 -0
  35. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/http.py +0 -0
  36. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/mdc/__init__.py +0 -0
  37. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/mdc/context.py +0 -0
  38. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/metadata.py +0 -0
  39. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/metrics.py +0 -0
  40. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/normalizer/__init__.py +0 -0
  41. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/normalizer/normalize_utils.py +0 -0
  42. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/resource_bundle/__init__.py +0 -0
  43. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/resource_bundle/exceptions.py +0 -0
  44. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/resource_bundle/strings.properties +0 -0
  45. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  46. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/sampler/__init__.py +0 -0
  47. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/sampler/base.py +0 -0
  48. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/sampler/random_under_sampler.py +0 -0
  49. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/sampler/utils.py +0 -0
  50. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/search_task.py +0 -0
  51. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/spinner.py +0 -0
  52. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  53. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/__init__.py +0 -0
  54. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/base_search_key_detector.py +0 -0
  55. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/blocked_time_series.py +0 -0
  56. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/config.py +0 -0
  57. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/country_utils.py +0 -0
  58. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/custom_loss_utils.py +0 -0
  59. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/cv_utils.py +0 -0
  60. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/datetime_utils.py +0 -0
  61. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/deduplicate_utils.py +0 -0
  62. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/display_utils.py +0 -0
  63. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/email_utils.py +0 -0
  64. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/fallback_progress_bar.py +0 -0
  65. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/feature_info.py +0 -0
  66. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/features_validator.py +0 -0
  67. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/format.py +0 -0
  68. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/hash_utils.py +0 -0
  69. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/ip_utils.py +0 -0
  70. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/mstats.py +0 -0
  71. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/phone_utils.py +0 -0
  72. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/postal_code_utils.py +0 -0
  73. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/progress_bar.py +0 -0
  74. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/psi.py +0 -0
  75. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/sample_utils.py +0 -0
  76. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/sklearn_ext.py +0 -0
  77. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/sort.py +0 -0
  78. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/target_utils.py +0 -0
  79. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/track_info.py +0 -0
  80. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/ts_utils.py +0 -0
  81. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/utils/warning_counter.py +0 -0
  82. {upgini-1.2.114a5 → upgini-1.2.114a6}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.114a5
3
+ Version: 1.2.114a6
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -749,6 +749,36 @@ enricher.fit(
749
749
  )
750
750
  ```
751
751
 
752
+ ### Control feature stability with PSI parameters
753
+
754
+ `FeaturesEnricher` supports Population Stability Index (PSI) calculation on eval_set to evaluate feature stability over time. You can control this behavior using stability parameters in `fit` and `fit_transform` methods:
755
+
756
+ ```python
757
+ enricher = FeaturesEnricher(
758
+ search_keys={"registration_date": SearchKey.DATE}
759
+ )
760
+
761
+ # Control feature stability during fit
762
+ enricher.fit(
763
+ X, y,
764
+ stability_threshold=0.2, # PSI threshold: features with PSI above this value will be dropped
765
+ stability_agg_func="max" # Aggregation function for stability values: "max", "min", "mean"
766
+ )
767
+
768
+ # Same parameters work for fit_transform
769
+ enriched_df = enricher.fit_transform(
770
+ X, y,
771
+ stability_threshold=0.1, # Stricter threshold for more stable features
772
+ stability_agg_func="mean" # Use mean aggregation instead of max
773
+ )
774
+ ```
775
+
776
+ **Stability parameters:**
777
+ - `stability_threshold` (float, default=0.2): PSI threshold value. Features with PSI below this threshold will be excluded from the final feature set. Lower values mean stricter stability requirements.
778
+ - `stability_agg_func` (str, default="max"): Function to aggregate PSI values across time intervals. Options: "max" (most conservative), "min" (least conservative), "mean" (balanced approach).
779
+
780
+ **PSI (Population Stability Index)** measures how much feature distribution changes over time. Lower PSI values indicate more stable features, which are generally more reliable for production ML models.
781
+
752
782
  ### Use custom loss function in feature selection & metrics calculation
753
783
 
754
784
  `FeaturesEnricher` can be initialized with additional string parameter `loss`.
@@ -703,6 +703,36 @@ enricher.fit(
703
703
  )
704
704
  ```
705
705
 
706
+ ### Control feature stability with PSI parameters
707
+
708
+ `FeaturesEnricher` supports Population Stability Index (PSI) calculation on eval_set to evaluate feature stability over time. You can control this behavior using stability parameters in `fit` and `fit_transform` methods:
709
+
710
+ ```python
711
+ enricher = FeaturesEnricher(
712
+ search_keys={"registration_date": SearchKey.DATE}
713
+ )
714
+
715
+ # Control feature stability during fit
716
+ enricher.fit(
717
+ X, y,
718
+ stability_threshold=0.2, # PSI threshold: features with PSI above this value will be dropped
719
+ stability_agg_func="max" # Aggregation function for stability values: "max", "min", "mean"
720
+ )
721
+
722
+ # Same parameters work for fit_transform
723
+ enriched_df = enricher.fit_transform(
724
+ X, y,
725
+ stability_threshold=0.1, # Stricter threshold for more stable features
726
+ stability_agg_func="mean" # Use mean aggregation instead of max
727
+ )
728
+ ```
729
+
730
+ **Stability parameters:**
731
+ - `stability_threshold` (float, default=0.2): PSI threshold value. Features with PSI below this threshold will be excluded from the final feature set. Lower values mean stricter stability requirements.
732
+ - `stability_agg_func` (str, default="max"): Function to aggregate PSI values across time intervals. Options: "max" (most conservative), "min" (least conservative), "mean" (balanced approach).
733
+
734
+ **PSI (Population Stability Index)** measures how much feature distribution changes over time. Lower PSI values indicate more stable features, which are generally more reliable for production ML models.
735
+
706
736
  ### Use custom loss function in feature selection & metrics calculation
707
737
 
708
738
  `FeaturesEnricher` can be initialized with additional string parameter `loss`.
@@ -0,0 +1 @@
1
+ __version__ = "1.2.114"
@@ -443,7 +443,7 @@ class FeaturesEnricher(TransformerMixin):
443
443
  search_id_callback: Optional[Callable[[str], Any]] = None,
444
444
  select_features: bool = True,
445
445
  auto_fe_parameters: Optional[AutoFEParameters] = None,
446
- stability_threshold: float = 0.15,
446
+ stability_threshold: float = 0.2,
447
447
  stability_agg_func: str = "max",
448
448
  **kwargs,
449
449
  ):
@@ -479,7 +479,7 @@ class FeaturesEnricher(TransformerMixin):
479
479
  If True, return only selected features both from input and data sources.
480
480
  Otherwise, return all features from input and only selected features from data sources.
481
481
 
482
- stability_threshold: float, optional (default=0.15)
482
+ stability_threshold: float, optional (default=0.2)
483
483
  Stability threshold for selected features PSI calculation. If PSI is less than this threshold,
484
484
  then feature will be dropped.
485
485
 
@@ -592,7 +592,7 @@ class FeaturesEnricher(TransformerMixin):
592
592
  progress_callback: Optional[Callable[[SearchProgress], Any]] = None,
593
593
  select_features: bool = True,
594
594
  auto_fe_parameters: Optional[AutoFEParameters] = None,
595
- stability_threshold: float = 0.15,
595
+ stability_threshold: float = 0.2,
596
596
  stability_agg_func: str = "max",
597
597
  **kwargs,
598
598
  ) -> pd.DataFrame:
@@ -629,7 +629,7 @@ class FeaturesEnricher(TransformerMixin):
629
629
  If True, return only selected features both from input and data sources.
630
630
  Otherwise, return all features from input and only selected features from data sources.
631
631
 
632
- stability_threshold: float, optional (default=0.15)
632
+ stability_threshold: float, optional (default=0.2)
633
633
  Stability threshold for selected features PSI calculation. If PSI is less than this threshold,
634
634
  then feature will be dropped.
635
635
 
@@ -1 +0,0 @@
1
- __version__ = "1.2.114a5"
File without changes
File without changes
File without changes
File without changes