upgini 1.2.114a5__tar.gz → 1.2.115a1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {upgini-1.2.114a5 → upgini-1.2.115a1}/PKG-INFO +31 -1
- {upgini-1.2.114a5 → upgini-1.2.115a1}/README.md +30 -0
- upgini-1.2.115a1/src/upgini/__about__.py +1 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/features_enricher.py +215 -207
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/http.py +2 -35
- upgini-1.2.114a5/src/upgini/__about__.py +0 -1
- {upgini-1.2.114a5 → upgini-1.2.115a1}/.gitignore +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/LICENSE +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/pyproject.toml +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/__init__.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/ads.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/autofe/all_operators.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/autofe/operator.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/autofe/timeseries/__init__.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/autofe/timeseries/base.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/autofe/timeseries/cross.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/autofe/timeseries/delta.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/autofe/timeseries/lag.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/autofe/timeseries/roll.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/autofe/timeseries/trend.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/autofe/timeseries/volatility.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/autofe/utils.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/dataset.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/errors.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/metadata.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/metrics.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/search_task.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/spinner.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/config.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/feature_info.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/hash_utils.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/mstats.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/psi.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/sample_utils.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/sort.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/ts_utils.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.114a5 → upgini-1.2.115a1}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: upgini
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.115a1
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
@@ -749,6 +749,36 @@ enricher.fit(
|
|
749
749
|
)
|
750
750
|
```
|
751
751
|
|
752
|
+
### Control feature stability with PSI parameters
|
753
|
+
|
754
|
+
`FeaturesEnricher` supports Population Stability Index (PSI) calculation on eval_set to evaluate feature stability over time. You can control this behavior using stability parameters in `fit` and `fit_transform` methods:
|
755
|
+
|
756
|
+
```python
|
757
|
+
enricher = FeaturesEnricher(
|
758
|
+
search_keys={"registration_date": SearchKey.DATE}
|
759
|
+
)
|
760
|
+
|
761
|
+
# Control feature stability during fit
|
762
|
+
enricher.fit(
|
763
|
+
X, y,
|
764
|
+
stability_threshold=0.2, # PSI threshold: features with PSI above this value will be dropped
|
765
|
+
stability_agg_func="max" # Aggregation function for stability values: "max", "min", "mean"
|
766
|
+
)
|
767
|
+
|
768
|
+
# Same parameters work for fit_transform
|
769
|
+
enriched_df = enricher.fit_transform(
|
770
|
+
X, y,
|
771
|
+
stability_threshold=0.1, # Stricter threshold for more stable features
|
772
|
+
stability_agg_func="mean" # Use mean aggregation instead of max
|
773
|
+
)
|
774
|
+
```
|
775
|
+
|
776
|
+
**Stability parameters:**
|
777
|
+
- `stability_threshold` (float, default=0.2): PSI threshold value. Features with PSI below this threshold will be excluded from the final feature set. Lower values mean stricter stability requirements.
|
778
|
+
- `stability_agg_func` (str, default="max"): Function to aggregate PSI values across time intervals. Options: "max" (most conservative), "min" (least conservative), "mean" (balanced approach).
|
779
|
+
|
780
|
+
**PSI (Population Stability Index)** measures how much feature distribution changes over time. Lower PSI values indicate more stable features, which are generally more reliable for production ML models.
|
781
|
+
|
752
782
|
### Use custom loss function in feature selection & metrics calculation
|
753
783
|
|
754
784
|
`FeaturesEnricher` can be initialized with additional string parameter `loss`.
|
@@ -703,6 +703,36 @@ enricher.fit(
|
|
703
703
|
)
|
704
704
|
```
|
705
705
|
|
706
|
+
### Control feature stability with PSI parameters
|
707
|
+
|
708
|
+
`FeaturesEnricher` supports Population Stability Index (PSI) calculation on eval_set to evaluate feature stability over time. You can control this behavior using stability parameters in `fit` and `fit_transform` methods:
|
709
|
+
|
710
|
+
```python
|
711
|
+
enricher = FeaturesEnricher(
|
712
|
+
search_keys={"registration_date": SearchKey.DATE}
|
713
|
+
)
|
714
|
+
|
715
|
+
# Control feature stability during fit
|
716
|
+
enricher.fit(
|
717
|
+
X, y,
|
718
|
+
stability_threshold=0.2, # PSI threshold: features with PSI above this value will be dropped
|
719
|
+
stability_agg_func="max" # Aggregation function for stability values: "max", "min", "mean"
|
720
|
+
)
|
721
|
+
|
722
|
+
# Same parameters work for fit_transform
|
723
|
+
enriched_df = enricher.fit_transform(
|
724
|
+
X, y,
|
725
|
+
stability_threshold=0.1, # Stricter threshold for more stable features
|
726
|
+
stability_agg_func="mean" # Use mean aggregation instead of max
|
727
|
+
)
|
728
|
+
```
|
729
|
+
|
730
|
+
**Stability parameters:**
|
731
|
+
- `stability_threshold` (float, default=0.2): PSI threshold value. Features with PSI below this threshold will be excluded from the final feature set. Lower values mean stricter stability requirements.
|
732
|
+
- `stability_agg_func` (str, default="max"): Function to aggregate PSI values across time intervals. Options: "max" (most conservative), "min" (least conservative), "mean" (balanced approach).
|
733
|
+
|
734
|
+
**PSI (Population Stability Index)** measures how much feature distribution changes over time. Lower PSI values indicate more stable features, which are generally more reliable for production ML models.
|
735
|
+
|
706
736
|
### Use custom loss function in feature selection & metrics calculation
|
707
737
|
|
708
738
|
`FeaturesEnricher` can be initialized with additional string parameter `loss`.
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "1.2.115a1"
|