upgini 1.2.114a4__tar.gz → 1.2.114a6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {upgini-1.2.114a4 → upgini-1.2.114a6}/PKG-INFO +32 -16
  2. {upgini-1.2.114a4 → upgini-1.2.114a6}/README.md +30 -14
  3. {upgini-1.2.114a4 → upgini-1.2.114a6}/pyproject.toml +1 -1
  4. upgini-1.2.114a6/src/upgini/__about__.py +1 -0
  5. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/dataset.py +8 -72
  6. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/features_enricher.py +276 -461
  7. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/http.py +11 -0
  8. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/metadata.py +0 -10
  9. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/resource_bundle/strings.properties +1 -4
  10. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/search_task.py +6 -0
  11. upgini-1.2.114a6/src/upgini/utils/config.py +43 -0
  12. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/display_utils.py +1 -1
  13. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/hash_utils.py +23 -1
  14. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/psi.py +6 -3
  15. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/sample_utils.py +16 -41
  16. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/target_utils.py +48 -2
  17. upgini-1.2.114a4/src/upgini/__about__.py +0 -1
  18. {upgini-1.2.114a4 → upgini-1.2.114a6}/.gitignore +0 -0
  19. {upgini-1.2.114a4 → upgini-1.2.114a6}/LICENSE +0 -0
  20. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/__init__.py +0 -0
  21. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/ads.py +0 -0
  22. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/ads_management/__init__.py +0 -0
  23. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/ads_management/ads_manager.py +0 -0
  24. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/autofe/__init__.py +0 -0
  25. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/autofe/all_operators.py +0 -0
  26. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/autofe/binary.py +0 -0
  27. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/autofe/date.py +0 -0
  28. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/autofe/feature.py +0 -0
  29. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/autofe/groupby.py +0 -0
  30. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/autofe/operator.py +0 -0
  31. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/autofe/timeseries/__init__.py +0 -0
  32. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/autofe/timeseries/base.py +0 -0
  33. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/autofe/timeseries/cross.py +0 -0
  34. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/autofe/timeseries/delta.py +0 -0
  35. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/autofe/timeseries/lag.py +0 -0
  36. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/autofe/timeseries/roll.py +0 -0
  37. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/autofe/timeseries/trend.py +0 -0
  38. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/autofe/timeseries/volatility.py +0 -0
  39. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/autofe/unary.py +0 -0
  40. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/autofe/utils.py +0 -0
  41. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/autofe/vector.py +0 -0
  42. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/data_source/__init__.py +0 -0
  43. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/data_source/data_source_publisher.py +0 -0
  44. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/errors.py +0 -0
  45. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/mdc/__init__.py +0 -0
  46. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/mdc/context.py +0 -0
  47. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/metrics.py +0 -0
  48. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/normalizer/__init__.py +0 -0
  49. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/normalizer/normalize_utils.py +0 -0
  50. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/resource_bundle/__init__.py +0 -0
  51. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/resource_bundle/exceptions.py +0 -0
  52. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  53. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/sampler/__init__.py +0 -0
  54. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/sampler/base.py +0 -0
  55. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/sampler/random_under_sampler.py +0 -0
  56. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/sampler/utils.py +0 -0
  57. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/spinner.py +0 -0
  58. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  59. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/__init__.py +0 -0
  60. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/base_search_key_detector.py +0 -0
  61. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/blocked_time_series.py +0 -0
  62. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/country_utils.py +0 -0
  63. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/custom_loss_utils.py +0 -0
  64. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/cv_utils.py +0 -0
  65. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/datetime_utils.py +0 -0
  66. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/deduplicate_utils.py +0 -0
  67. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/email_utils.py +0 -0
  68. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/fallback_progress_bar.py +0 -0
  69. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/feature_info.py +0 -0
  70. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/features_validator.py +0 -0
  71. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/format.py +0 -0
  72. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/ip_utils.py +0 -0
  73. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/mstats.py +0 -0
  74. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/phone_utils.py +0 -0
  75. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/postal_code_utils.py +0 -0
  76. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/progress_bar.py +0 -0
  77. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/sklearn_ext.py +0 -0
  78. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/sort.py +0 -0
  79. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/track_info.py +0 -0
  80. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/ts_utils.py +0 -0
  81. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/utils/warning_counter.py +0 -0
  82. {upgini-1.2.114a4 → upgini-1.2.114a6}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.114a4
3
+ Version: 1.2.114a6
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -38,7 +38,7 @@ Requires-Dist: python-bidi==0.4.2
38
38
  Requires-Dist: python-dateutil>=2.8.0
39
39
  Requires-Dist: python-json-logger>=3.3.0
40
40
  Requires-Dist: requests>=2.8.0
41
- Requires-Dist: scikit-learn>=1.3.0
41
+ Requires-Dist: scikit-learn<1.8.0,>=1.3.0
42
42
  Requires-Dist: scipy>=1.10.0
43
43
  Requires-Dist: shap>=0.44.0
44
44
  Requires-Dist: xhtml2pdf<0.3.0,>=0.2.11
@@ -749,6 +749,36 @@ enricher.fit(
749
749
  )
750
750
  ```
751
751
 
752
+ ### Control feature stability with PSI parameters
753
+
754
+ `FeaturesEnricher` supports Population Stability Index (PSI) calculation on eval_set to evaluate feature stability over time. You can control this behavior using stability parameters in `fit` and `fit_transform` methods:
755
+
756
+ ```python
757
+ enricher = FeaturesEnricher(
758
+ search_keys={"registration_date": SearchKey.DATE}
759
+ )
760
+
761
+ # Control feature stability during fit
762
+ enricher.fit(
763
+ X, y,
764
+ stability_threshold=0.2, # PSI threshold: features with PSI above this value will be dropped
765
+ stability_agg_func="max" # Aggregation function for stability values: "max", "min", "mean"
766
+ )
767
+
768
+ # Same parameters work for fit_transform
769
+ enriched_df = enricher.fit_transform(
770
+ X, y,
771
+ stability_threshold=0.1, # Stricter threshold for more stable features
772
+ stability_agg_func="mean" # Use mean aggregation instead of max
773
+ )
774
+ ```
775
+
776
+ **Stability parameters:**
777
+ - `stability_threshold` (float, default=0.2): PSI threshold value. Features with PSI below this threshold will be excluded from the final feature set. Lower values mean stricter stability requirements.
778
+ - `stability_agg_func` (str, default="max"): Function to aggregate PSI values across time intervals. Options: "max" (most conservative), "min" (least conservative), "mean" (balanced approach).
779
+
780
+ **PSI (Population Stability Index)** measures how much feature distribution changes over time. Lower PSI values indicate more stable features, which are generally more reliable for production ML models.
781
+
752
782
  ### Use custom loss function in feature selection & metrics calculation
753
783
 
754
784
  `FeaturesEnricher` can be initialized with additional string parameter `loss`.
@@ -769,20 +799,6 @@ enricher = FeaturesEnricher(
769
799
  enriched_dataframe.fit(X, y)
770
800
  ```
771
801
 
772
- ### Return initial dataframe enriched with TOP external features by importance
773
-
774
- `transform` and `fit_transform` methods of `FeaturesEnricher` can be used with two additional parameters:
775
- - `importance_threshold`: float = 0 - only features with *importance >= threshold* will be added to the output dataframe
776
- - `max_features`: int - only first TOP N features by importance will be returned, where *N = max_features*
777
-
778
- And `keep_input=True` will keep all initial columns from search dataset X:
779
- ```python
780
- enricher = FeaturesEnricher(
781
- search_keys={"subscription_activation_date": SearchKey.DATE}
782
- )
783
- enriched_dataframe.fit_transform(X, y, keep_input=True, max_features=2)
784
- ```
785
-
786
802
  ### Exclude premium data sources from fit, transform and metrics calculation
787
803
 
788
804
  `fit`, `fit_transform`, `transform` and `calculate_metrics` methods of `FeaturesEnricher` can be used with parameter `exclude_features_sources` that allows to exclude Trial or Paid features from Premium data sources:
@@ -703,6 +703,36 @@ enricher.fit(
703
703
  )
704
704
  ```
705
705
 
706
+ ### Control feature stability with PSI parameters
707
+
708
+ `FeaturesEnricher` supports Population Stability Index (PSI) calculation on eval_set to evaluate feature stability over time. You can control this behavior using stability parameters in `fit` and `fit_transform` methods:
709
+
710
+ ```python
711
+ enricher = FeaturesEnricher(
712
+ search_keys={"registration_date": SearchKey.DATE}
713
+ )
714
+
715
+ # Control feature stability during fit
716
+ enricher.fit(
717
+ X, y,
718
+ stability_threshold=0.2, # PSI threshold: features with PSI above this value will be dropped
719
+ stability_agg_func="max" # Aggregation function for stability values: "max", "min", "mean"
720
+ )
721
+
722
+ # Same parameters work for fit_transform
723
+ enriched_df = enricher.fit_transform(
724
+ X, y,
725
+ stability_threshold=0.1, # Stricter threshold for more stable features
726
+ stability_agg_func="mean" # Use mean aggregation instead of max
727
+ )
728
+ ```
729
+
730
+ **Stability parameters:**
731
+ - `stability_threshold` (float, default=0.2): PSI threshold value. Features with PSI below this threshold will be excluded from the final feature set. Lower values mean stricter stability requirements.
732
+ - `stability_agg_func` (str, default="max"): Function to aggregate PSI values across time intervals. Options: "max" (most conservative), "min" (least conservative), "mean" (balanced approach).
733
+
734
+ **PSI (Population Stability Index)** measures how much feature distribution changes over time. Lower PSI values indicate more stable features, which are generally more reliable for production ML models.
735
+
706
736
  ### Use custom loss function in feature selection & metrics calculation
707
737
 
708
738
  `FeaturesEnricher` can be initialized with additional string parameter `loss`.
@@ -723,20 +753,6 @@ enricher = FeaturesEnricher(
723
753
  enriched_dataframe.fit(X, y)
724
754
  ```
725
755
 
726
- ### Return initial dataframe enriched with TOP external features by importance
727
-
728
- `transform` and `fit_transform` methods of `FeaturesEnricher` can be used with two additional parameters:
729
- - `importance_threshold`: float = 0 - only features with *importance >= threshold* will be added to the output dataframe
730
- - `max_features`: int - only first TOP N features by importance will be returned, where *N = max_features*
731
-
732
- And `keep_input=True` will keep all initial columns from search dataset X:
733
- ```python
734
- enricher = FeaturesEnricher(
735
- search_keys={"subscription_activation_date": SearchKey.DATE}
736
- )
737
- enriched_dataframe.fit_transform(X, y, keep_input=True, max_features=2)
738
- ```
739
-
740
756
  ### Exclude premium data sources from fit, transform and metrics calculation
741
757
 
742
758
  `fit`, `fit_transform`, `transform` and `calculate_metrics` methods of `FeaturesEnricher` can be used with parameter `exclude_features_sources` that allows to exclude Trial or Paid features from Premium data sources:
@@ -46,7 +46,7 @@ dependencies = [
46
46
  "python-dateutil>=2.8.0",
47
47
  "python-json-logger>=3.3.0",
48
48
  "requests>=2.8.0",
49
- "scikit-learn>=1.3.0",
49
+ "scikit-learn>=1.3.0,<1.8.0",
50
50
  "scipy>=1.10.0",
51
51
  "python-bidi==0.4.2",
52
52
  "xhtml2pdf>=0.2.11,<0.3.0",
@@ -0,0 +1 @@
1
+ __version__ = "1.2.114"
@@ -25,7 +25,6 @@ from upgini.metadata import (
25
25
  AutoFEParameters,
26
26
  CVType,
27
27
  DataType,
28
- FeaturesFilter,
29
28
  FileColumnMeaningType,
30
29
  FileColumnMetadata,
31
30
  FileMetadata,
@@ -37,8 +36,9 @@ from upgini.metadata import (
37
36
  )
38
37
  from upgini.resource_bundle import ResourceBundle, get_custom_bundle
39
38
  from upgini.search_task import SearchTask
39
+ from upgini.utils.config import SampleConfig
40
40
  from upgini.utils.email_utils import EmailSearchKeyConverter
41
- from upgini.utils.sample_utils import SampleColumns, SampleConfig, sample
41
+ from upgini.utils.sample_utils import SampleColumns, sample
42
42
 
43
43
  try:
44
44
  from upgini.utils.progress_bar import CustomProgressBar as ProgressBar
@@ -51,9 +51,6 @@ except Exception:
51
51
  class Dataset:
52
52
  MIN_ROWS_COUNT = 100
53
53
  MAX_ROWS = 200_000
54
- IMBALANCE_THESHOLD = 0.6
55
- MIN_TARGET_CLASS_ROWS = 100
56
- MAX_MULTICLASS_CLASS_COUNT = 100
57
54
  MIN_SUPPORTED_DATE_TS = 946684800000 # 2000-01-01
58
55
  MAX_FEATURES_COUNT = 3500
59
56
  MAX_UPLOADING_FILE_SIZE = 268435456 # 256 Mb
@@ -73,6 +70,7 @@ class Dataset:
73
70
  cv_type: Optional[CVType] = None,
74
71
  date_column: Optional[str] = None,
75
72
  id_columns: Optional[List[str]] = None,
73
+ is_imbalanced: bool = False,
76
74
  random_state: Optional[int] = None,
77
75
  sample_config: Optional[SampleConfig] = None,
78
76
  rest_client: Optional[_RestClient] = None,
@@ -117,8 +115,9 @@ class Dataset:
117
115
  self.rest_client = rest_client
118
116
  self.random_state = random_state
119
117
  self.columns_renaming: Dict[str, str] = {}
120
- self.imbalanced: bool = False
118
+ self.is_imbalanced: bool = False
121
119
  self.id_columns = id_columns
120
+ self.is_imbalanced = is_imbalanced
122
121
  self.date_column = date_column
123
122
  if logger is not None:
124
123
  self.logger = logger
@@ -239,8 +238,6 @@ class Dataset:
239
238
  else:
240
239
  train_segment = self.data
241
240
 
242
- self.imbalanced = self.__is_imbalanced(train_segment)
243
-
244
241
  sample_columns = SampleColumns(
245
242
  ids=self.id_columns,
246
243
  date=self.date_column,
@@ -249,55 +246,19 @@ class Dataset:
249
246
  )
250
247
 
251
248
  self.data = sample(
252
- train_segment if self.imbalanced else self.data, # for imbalanced data we will be doing transform anyway
249
+ train_segment if self.is_imbalanced else self.data, # for imbalanced data we will be doing transform anyway
253
250
  self.task_type,
254
251
  self.cv_type,
255
252
  self.sample_config,
256
253
  sample_columns,
257
254
  self.random_state,
258
- balance=self.imbalanced,
255
+ balance=self.is_imbalanced,
259
256
  force_downsampling=force_downsampling,
260
257
  logger=self.logger,
261
258
  bundle=self.bundle,
262
259
  warning_callback=self.warning_callback,
263
260
  )
264
261
 
265
- def __is_imbalanced(self, data: pd.DataFrame) -> bool:
266
- if self.task_type is None or not self.task_type.is_classification():
267
- return False
268
-
269
- if self.task_type == ModelTaskType.BINARY and len(data) <= self.sample_config.binary_min_sample_threshold:
270
- return False
271
-
272
- count = len(data)
273
- target_column = self.etalon_def_checked.get(FileColumnMeaningType.TARGET.value, TARGET)
274
- target = data[target_column]
275
- target_classes_count = target.nunique()
276
-
277
- if target_classes_count > self.MAX_MULTICLASS_CLASS_COUNT:
278
- msg = self.bundle.get("dataset_to_many_multiclass_targets").format(
279
- target_classes_count, self.MAX_MULTICLASS_CLASS_COUNT
280
- )
281
- self.logger.warning(msg)
282
- raise ValidationError(msg)
283
-
284
- vc = target.value_counts()
285
- min_class_value = vc.index[len(vc) - 1]
286
- min_class_count = vc[min_class_value]
287
-
288
- if min_class_count < self.MIN_TARGET_CLASS_ROWS:
289
- msg = self.bundle.get("dataset_rarest_class_less_min").format(
290
- min_class_value, min_class_count, self.MIN_TARGET_CLASS_ROWS
291
- )
292
- self.logger.warning(msg)
293
- raise ValidationError(msg)
294
-
295
- min_class_percent = self.IMBALANCE_THESHOLD / target_classes_count
296
- min_class_threshold = min_class_percent * count
297
-
298
- # If min class count less than 30% for binary or (60 / classes_count)% for multiclass
299
- return bool(min_class_count < min_class_threshold)
300
-
301
262
  def __validate_dataset(self, validate_target: bool, silent_mode: bool):
302
263
  """Validate DataSet"""
303
264
  # self.logger.info("validating etalon")
@@ -537,9 +498,6 @@ class Dataset:
537
498
  return_scores: bool,
538
499
  extract_features: bool,
539
500
  accurate_model: Optional[bool] = None,
540
- importance_threshold: Optional[float] = None,
541
- max_features: Optional[int] = None,
542
- filter_features: Optional[dict] = None,
543
501
  runtime_parameters: Optional[RuntimeParameters] = None,
544
502
  metrics_calculation: Optional[bool] = False,
545
503
  auto_fe_parameters: Optional[AutoFEParameters] = None,
@@ -548,28 +506,12 @@ class Dataset:
548
506
  search_customization = SearchCustomization(
549
507
  extractFeatures=extract_features,
550
508
  accurateModel=accurate_model,
551
- importanceThreshold=importance_threshold,
552
- maxFeatures=max_features,
553
509
  returnScores=return_scores,
554
510
  runtimeParameters=runtime_parameters,
555
511
  metricsCalculation=metrics_calculation,
556
512
  )
557
- if filter_features:
558
- if [
559
- key
560
- for key in filter_features
561
- if key not in {"min_importance", "max_psi", "max_count", "selected_features"}
562
- ]:
563
- raise ValidationError(self.bundle.get("dataset_invalid_filter"))
564
- feature_filter = FeaturesFilter(
565
- minImportance=filter_features.get("min_importance"),
566
- maxPSI=filter_features.get("max_psi"),
567
- maxCount=filter_features.get("max_count"),
568
- selectedFeatures=filter_features.get("selected_features"),
569
- )
570
- search_customization.featuresFilter = feature_filter
571
513
 
572
- search_customization.runtimeParameters.properties["etalon_imbalanced"] = self.imbalanced
514
+ search_customization.runtimeParameters.properties["etalon_imbalanced"] = self.is_imbalanced
573
515
  if auto_fe_parameters is not None:
574
516
  search_customization.runtimeParameters.properties["feature_generation_params.ts.gap_days"] = (
575
517
  auto_fe_parameters.ts_gap_days
@@ -624,9 +566,6 @@ class Dataset:
624
566
  extract_features: bool = False,
625
567
  accurate_model: bool = False,
626
568
  exclude_features_sources: Optional[List[str]] = None,
627
- importance_threshold: Optional[float] = None, # deprecated
628
- max_features: Optional[int] = None, # deprecated
629
- filter_features: Optional[dict] = None, # deprecated
630
569
  runtime_parameters: Optional[RuntimeParameters] = None,
631
570
  auto_fe_parameters: Optional[AutoFEParameters] = None,
632
571
  force_downsampling: bool = False,
@@ -643,9 +582,6 @@ class Dataset:
643
582
  return_scores=return_scores,
644
583
  extract_features=extract_features,
645
584
  accurate_model=accurate_model,
646
- importance_threshold=importance_threshold,
647
- max_features=max_features,
648
- filter_features=filter_features,
649
585
  runtime_parameters=runtime_parameters,
650
586
  auto_fe_parameters=auto_fe_parameters,
651
587
  )