upgini 1.2.70a3832.dev3__tar.gz → 1.2.71a3832.dev3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (79) hide show
  1. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/PKG-INFO +3 -4
  2. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/pyproject.toml +4 -9
  3. upgini-1.2.71a3832.dev3/src/upgini/__about__.py +1 -0
  4. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/autofe/date.py +15 -21
  5. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/autofe/feature.py +5 -1
  6. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/autofe/timeseries/base.py +3 -9
  7. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/autofe/timeseries/cross.py +22 -12
  8. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/autofe/timeseries/roll.py +2 -7
  9. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/autofe/timeseries/trend.py +2 -1
  10. upgini-1.2.71a3832.dev3/src/upgini/autofe/utils.py +83 -0
  11. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/dataset.py +8 -1
  12. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/features_enricher.py +262 -253
  13. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/metadata.py +4 -0
  14. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/resource_bundle/strings.properties +1 -0
  15. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/search_task.py +7 -1
  16. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/mstats.py +1 -1
  17. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/sort.py +1 -1
  18. upgini-1.2.70a3832.dev3/src/upgini/__about__.py +0 -1
  19. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/.gitignore +0 -0
  20. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/LICENSE +0 -0
  21. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/README.md +0 -0
  22. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/__init__.py +0 -0
  23. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/ads.py +0 -0
  24. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/ads_management/__init__.py +0 -0
  25. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/ads_management/ads_manager.py +0 -0
  26. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/autofe/__init__.py +0 -0
  27. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/autofe/all_operators.py +0 -0
  28. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/autofe/binary.py +0 -0
  29. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/autofe/groupby.py +0 -0
  30. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/autofe/operator.py +0 -0
  31. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/autofe/timeseries/__init__.py +0 -0
  32. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/autofe/timeseries/delta.py +0 -0
  33. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/autofe/timeseries/lag.py +0 -0
  34. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/autofe/timeseries/volatility.py +0 -0
  35. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/autofe/unary.py +0 -0
  36. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/autofe/vector.py +0 -0
  37. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/data_source/__init__.py +0 -0
  38. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/data_source/data_source_publisher.py +0 -0
  39. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/errors.py +0 -0
  40. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/http.py +0 -0
  41. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/lazy_import.py +0 -0
  42. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/mdc/__init__.py +0 -0
  43. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/mdc/context.py +0 -0
  44. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/metrics.py +0 -0
  45. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/normalizer/__init__.py +0 -0
  46. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/normalizer/normalize_utils.py +0 -0
  47. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/resource_bundle/__init__.py +0 -0
  48. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/resource_bundle/exceptions.py +0 -0
  49. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  50. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/sampler/__init__.py +0 -0
  51. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/sampler/base.py +0 -0
  52. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/sampler/random_under_sampler.py +0 -0
  53. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/sampler/utils.py +0 -0
  54. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/spinner.py +0 -0
  55. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  56. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/__init__.py +0 -0
  57. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/base_search_key_detector.py +0 -0
  58. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/blocked_time_series.py +0 -0
  59. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/country_utils.py +0 -0
  60. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/custom_loss_utils.py +0 -0
  61. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/cv_utils.py +0 -0
  62. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/datetime_utils.py +0 -0
  63. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/deduplicate_utils.py +0 -0
  64. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/display_utils.py +0 -0
  65. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/email_utils.py +0 -0
  66. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/fallback_progress_bar.py +0 -0
  67. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/feature_info.py +0 -0
  68. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/features_validator.py +0 -0
  69. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/format.py +0 -0
  70. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/ip_utils.py +0 -0
  71. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/phone_utils.py +0 -0
  72. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/postal_code_utils.py +0 -0
  73. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/progress_bar.py +0 -0
  74. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/sklearn_ext.py +0 -0
  75. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/target_utils.py +0 -0
  76. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/track_info.py +0 -0
  77. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/ts_utils.py +0 -0
  78. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/utils/warning_counter.py +0 -0
  79. {upgini-1.2.70a3832.dev3 → upgini-1.2.71a3832.dev3}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.70a3832.dev3
3
+ Version: 1.2.71a3832.dev3
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -17,12 +17,11 @@ Classifier: Intended Audience :: Science/Research
17
17
  Classifier: Intended Audience :: Telecommunications Industry
18
18
  Classifier: License :: OSI Approved :: BSD License
19
19
  Classifier: Operating System :: OS Independent
20
- Classifier: Programming Language :: Python :: 3.8
21
- Classifier: Programming Language :: Python :: 3.9
22
20
  Classifier: Programming Language :: Python :: 3.10
21
+ Classifier: Programming Language :: Python :: 3.11
23
22
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
23
  Classifier: Topic :: Scientific/Engineering :: Information Analysis
25
- Requires-Python: <3.12,>=3.8
24
+ Requires-Python: <3.12,>=3.10
26
25
  Requires-Dist: fastparquet>=0.8.1
27
26
  Requires-Dist: ipywidgets>=8.1.0
28
27
  Requires-Dist: jarowinkler>=2.0.0
@@ -7,7 +7,7 @@ name = "upgini"
7
7
  dynamic = ["version"]
8
8
  description = "Intelligent data search & enrichment for Machine Learning"
9
9
  readme = "README.md"
10
- requires-python = ">=3.8,<3.12"
10
+ requires-python = ">=3.10,<3.12"
11
11
  authors = [
12
12
  { name = "Upgini Developers", email = "madewithlove@upgini.com" },
13
13
  ]
@@ -28,9 +28,8 @@ classifiers = [
28
28
  "Intended Audience :: Telecommunications Industry",
29
29
  "License :: OSI Approved :: BSD License",
30
30
  "Operating System :: OS Independent",
31
- "Programming Language :: Python :: 3.8",
32
- "Programming Language :: Python :: 3.9",
33
31
  "Programming Language :: Python :: 3.10",
32
+ "Programming Language :: Python :: 3.11",
34
33
  "Topic :: Scientific/Engineering :: Artificial Intelligence",
35
34
  "Topic :: Scientific/Engineering :: Information Analysis",
36
35
  ]
@@ -89,15 +88,11 @@ lint = "ruff check {args}"
89
88
  test_all = 'pytest -s -vv tests'
90
89
 
91
90
  [[tool.hatch.envs.test.matrix]]
92
- python = ["3.8"]
93
- pandas = ["1.1.0"]
94
-
95
- [[tool.hatch.envs.test.matrix]]
96
- python = ["3.8", "3.9", "3.10", "3.11"]
91
+ python = ["3.10", "3.11"]
97
92
  pandas = ["1.2.0", "1.3.0", "1.4.0", "1.5.0", "2.0.0"]
98
93
 
99
94
  [[tool.hatch.envs.test.matrix]]
100
- python = ["3.9", "3.10", "3.11"]
95
+ python = ["3.10", "3.11"]
101
96
  pandas = ["2.1.0", "2.2.0"]
102
97
 
103
98
  # from versions: 0.1, 0.2, 0.3.0, 0.4.0, 0.4.1, 0.4.2, 0.4.3, 0.5.0, 0.6.0, 0.6.1, 0.7.0, 0.7.1, 0.7.2, 0.7.3, 0.8.0, 0.8.1, 0.9.0, 0.9.1, 0.10.0, 0.10.1, 0.11.0, 0.12.0, 0.13.0, 0.13.1, 0.14.0, 0.14.1, 0.15.0, 0.15.1, 0.15.2, 0.16.0, 0.16.1, 0.16.2, 0.17.0, 0.17.1, 0.18.0, 0.18.1, 0.19.0, 0.19.1, 0.19.2, 0.20.0, 0.20.1, 0.20.2, 0.20.3, 0.21.0, 0.21.1, 0.22.0, 0.23.0, 0.23.1, 0.23.2, 0.23.3, 0.23.4, 0.24.0, 0.24.1, 0.24.2, 0.25.0, 0.25.1, 0.25.2, 0.25.3, 1.0.0, 1.0.1, 1.0.2, 1.0.3, 1.0.4, 1.0.5, 1.1.0, 1.1.1, 1.1.2, 1.1.3, 1.1.4, 1.1.5, 1.2.0, 1.2.1, 1.2.2, 1.2.3, 1.2.4, 1.2.5, 1.3.0, 1.3.1, 1.3.2, 1.3.3, 1.3.4, 1.3.5, 1.4.0rc0, 1.4.0, 1.4.1, 1.4.2, 1.4.3, 1.4.4, 1.5.0rc0, 1.5.0, 1.5.1, 1.5.2, 1.5.3, 2.0.0rc0, 2.0.0rc1, 2.0.0, 2.0.1, 2.0.2, 2.0.3
@@ -0,0 +1 @@
1
+ __version__ = "1.2.71a3832.dev3"
@@ -8,6 +8,7 @@ from pandas.core.arrays.timedeltas import TimedeltaArray
8
8
  from pydantic import BaseModel, __version__ as pydantic_version
9
9
 
10
10
  from upgini.autofe.operator import PandasOperator, ParametrizedOperator
11
+ from upgini.autofe.utils import pydantic_validator
11
12
 
12
13
 
13
14
  def get_pydantic_version():
@@ -209,6 +210,14 @@ class DateListDiffBounded(DateListDiff, ParametrizedOperator):
209
210
 
210
211
  return cls(diff_unit=diff_unit, lower_bound=lower_bound, upper_bound=upper_bound, aggregation=aggregation)
211
212
 
213
+ def get_params(self) -> Dict[str, Optional[str]]:
214
+ res = super().get_params()
215
+ if self.lower_bound is not None:
216
+ res["lower_bound"] = str(self.lower_bound)
217
+ if self.upper_bound is not None:
218
+ res["upper_bound"] = str(self.upper_bound)
219
+ return res
220
+
212
221
  def _agg(self, x):
213
222
  x = x[
214
223
  (x >= (self.lower_bound if self.lower_bound is not None else -np.inf))
@@ -269,32 +278,17 @@ class DatePercentile(DatePercentileBase):
269
278
  {
270
279
  "zero_month": self.zero_month,
271
280
  "zero_year": self.zero_year,
272
- "zero_bounds": self.zero_bounds,
281
+ "zero_bounds": json.dumps(self.zero_bounds),
273
282
  "step": self.step,
274
283
  }
275
284
  )
276
285
  return res
277
286
 
278
- # Check Pydantic version
279
- if get_pydantic_version() >= 2:
280
- # Use @field_validator for Pydantic 2.x
281
- from pydantic import field_validator
282
-
283
- @field_validator("zero_bounds", mode="before")
284
- def parse_zero_bounds(cls, value):
285
- if isinstance(value, str):
286
- return json.loads(value)
287
- return value
288
-
289
- else:
290
- # Use @validator for Pydantic 1.x
291
- from pydantic import validator
292
-
293
- @validator("zero_bounds", pre=True)
294
- def parse_zero_bounds(cls, value):
295
- if isinstance(value, str):
296
- return json.loads(value)
297
- return value
287
+ @pydantic_validator("zero_bounds", mode="before")
288
+ def parse_zero_bounds(cls, value):
289
+ if isinstance(value, str):
290
+ return json.loads(value)
291
+ return value
298
292
 
299
293
  def _get_bounds(self, date_col: pd.Series) -> pd.Series:
300
294
  months = date_col.dt.month
@@ -112,7 +112,11 @@ class Feature:
112
112
 
113
113
  def get_hash(self) -> str:
114
114
  return hashlib.sha256(
115
- "_".join([self.op.get_hash_component()] + [ch.get_display_name() for ch in self.children]).encode("utf-8")
115
+ "_".join(
116
+ [self.op.get_hash_component()]
117
+ + [ch.op.get_hash_component() for ch in self.children if isinstance(ch, Feature)]
118
+ + [ch.get_display_name() for ch in self.children]
119
+ ).encode("utf-8")
116
120
  ).hexdigest()[:8]
117
121
 
118
122
  def set_alias(self, alias: str) -> "Feature":
@@ -1,15 +1,9 @@
1
1
  import abc
2
- from typing import Dict, List, Optional
2
+ from typing import Dict, List, Optional, Tuple
3
3
 
4
4
  import pandas as pd
5
5
  from upgini.autofe.operator import PandasOperator
6
6
 
7
- # Used in derived classes
8
- try:
9
- from pydantic import field_validator as validator # V2
10
- except ImportError:
11
- from pydantic import validator # V1
12
-
13
7
 
14
8
  class TimeSeriesBase(PandasOperator, abc.ABC):
15
9
  is_vector: bool = True
@@ -70,7 +64,7 @@ class TimeSeriesBase(PandasOperator, abc.ABC):
70
64
  return base_formula
71
65
 
72
66
  @classmethod
73
- def _parse_offset_from_formula(cls, formula: str, base_regex: str) -> tuple[Optional[dict], Optional[str]]:
67
+ def _parse_offset_from_formula(cls, formula: str, base_regex: str) -> Tuple[Optional[Dict], Optional[str]]:
74
68
  """
75
69
  Parse the offset component from a formula.
76
70
 
@@ -85,7 +79,7 @@ class TimeSeriesBase(PandasOperator, abc.ABC):
85
79
  """
86
80
  import re
87
81
 
88
- offset_regex = f"{base_regex}_offset_(\\d+)([a-zA-Z])"
82
+ offset_regex = f"{base_regex}_offset_(\\d+)([a-zA-Z])$"
89
83
  match = re.match(offset_regex, formula)
90
84
 
91
85
  if match:
@@ -1,16 +1,13 @@
1
+ import json
1
2
  from typing import Dict, List, Optional
2
3
 
3
4
  import numpy as np
4
5
  import pandas as pd
5
6
 
6
- try:
7
- from pydantic import field_validator as validator # V2
8
- except ImportError:
9
- from pydantic import validator # V1
10
-
11
7
  from upgini.autofe.all_operators import find_op
12
8
  from upgini.autofe.operator import PandasOperator, ParametrizedOperator
13
9
  from upgini.autofe.timeseries.base import TimeSeriesBase
10
+ from upgini.autofe.utils import pydantic_validator
14
11
 
15
12
 
16
13
  class CrossSeriesInteraction(TimeSeriesBase, ParametrizedOperator):
@@ -20,11 +17,24 @@ class CrossSeriesInteraction(TimeSeriesBase, ParametrizedOperator):
20
17
  left_descriptor: List[str] = []
21
18
  right_descriptor: List[str] = []
22
19
 
23
- @validator("descriptor_indices")
24
- @classmethod
20
+ @pydantic_validator("descriptor_indices", mode="before")
25
21
  def validate_descriptor_indices(cls, v):
22
+ if isinstance(v, str):
23
+ v = json.loads(v)
26
24
  if not v:
27
- raise ValueError("descriptor_indices cannot be empty for CrossSeriesInteraction")
25
+ raise ValueError("descriptor_indices cannot be empty")
26
+ return v
27
+
28
+ @pydantic_validator("left_descriptor", "right_descriptor", mode="before")
29
+ def parse_descriptors(cls, v):
30
+ if isinstance(v, str):
31
+ return json.loads(v)
32
+ return v
33
+
34
+ @pydantic_validator("interaction_op", mode="before")
35
+ def validate_interaction_op(cls, v):
36
+ if isinstance(v, str):
37
+ return find_op(v)
28
38
  return v
29
39
 
30
40
  def __init__(self, **data):
@@ -83,14 +93,14 @@ class CrossSeriesInteraction(TimeSeriesBase, ParametrizedOperator):
83
93
 
84
94
  return cls(**params)
85
95
 
86
- def get_params(self) -> Dict[str, str | None]:
96
+ def get_params(self) -> Dict[str, Optional[str]]:
87
97
  res = super().get_params()
88
98
  res.update(
89
99
  {
90
100
  "interaction_op": self._get_interaction_op_name(),
91
- "descriptor_indices": self.descriptor_indices,
92
- "left_descriptor": self.left_descriptor,
93
- "right_descriptor": self.right_descriptor,
101
+ "descriptor_indices": json.dumps(self.descriptor_indices),
102
+ "left_descriptor": json.dumps(self.left_descriptor),
103
+ "right_descriptor": json.dumps(self.right_descriptor),
94
104
  }
95
105
  )
96
106
  return res
@@ -3,6 +3,7 @@ from typing import Dict, Optional
3
3
 
4
4
  from upgini.autofe.operator import ParametrizedOperator
5
5
  from upgini.autofe.timeseries.base import TimeSeriesBase
6
+ from upgini.autofe.utils import pydantic_validator
6
7
 
7
8
  # Roll aggregation functions
8
9
  roll_aggregations = {
@@ -12,19 +13,13 @@ roll_aggregations = {
12
13
  "iqr": lambda x: x.quantile(0.75) - x.quantile(0.25),
13
14
  }
14
15
 
15
- try:
16
- from pydantic import field_validator as validator # V2
17
- except ImportError:
18
- from pydantic import validator # V1
19
-
20
16
 
21
17
  class Roll(TimeSeriesBase, ParametrizedOperator):
22
18
  aggregation: str
23
19
  window_size: int = 1
24
20
  window_unit: str = "D"
25
21
 
26
- @validator("window_unit")
27
- @classmethod
22
+ @pydantic_validator("window_unit")
28
23
  def validate_window_unit(cls, v: str) -> str:
29
24
  try:
30
25
  pd.tseries.frequencies.to_offset(v)
@@ -2,10 +2,11 @@ from typing import Dict, Optional, Union
2
2
  import numpy as np
3
3
  import pandas as pd
4
4
 
5
+ from upgini.autofe.operator import ParametrizedOperator
5
6
  from upgini.autofe.timeseries.base import TimeSeriesBase
6
7
 
7
8
 
8
- class TrendCoefficient(TimeSeriesBase):
9
+ class TrendCoefficient(TimeSeriesBase, ParametrizedOperator):
9
10
  name: str = "trend_coef"
10
11
  step_size: int = 1
11
12
  step_unit: str = "D"
@@ -0,0 +1,83 @@
1
+ """
2
+ Utility functions for autofe module.
3
+ """
4
+
5
+ import functools
6
+ from typing import Callable
7
+
8
+
9
+ def get_pydantic_version():
10
+ """
11
+ Get the major version of pydantic.
12
+
13
+ Returns:
14
+ int: Major version number (1 or 2)
15
+ """
16
+ try:
17
+ from pydantic import __version__ as pydantic_version
18
+
19
+ major_version = int(pydantic_version.split(".")[0])
20
+ return major_version
21
+ except (ImportError, ValueError):
22
+ # Default to version 1 if unable to determine
23
+ return 1
24
+
25
+
26
+ def pydantic_validator(field_name: str, *fields, mode: str = "before", **kwargs):
27
+ """
28
+ A decorator that applies the appropriate Pydantic validator based on the installed version.
29
+
30
+ This decorator handles the differences between Pydantic v1 and v2 validator syntax,
31
+ making it easier to write code that works with both versions.
32
+
33
+ Args:
34
+ field_name (str): The name of the field to validate
35
+ mode (str): The validation mode, either "before" or "after" (for Pydantic v2)
36
+ **kwargs: Additional arguments to pass to the validator
37
+
38
+ Returns:
39
+ Callable: A decorator that can be applied to validator methods
40
+
41
+ Example:
42
+ ```python
43
+ class MyModel(BaseModel):
44
+ items: List[int]
45
+
46
+ @pydantic_validator("items")
47
+ def parse_items(cls, value):
48
+ if isinstance(value, str):
49
+ return [int(x) for x in value.split(",")]
50
+ return value
51
+ ```
52
+ """
53
+ pydantic_version = get_pydantic_version()
54
+
55
+ if pydantic_version >= 2:
56
+ # Use field_validator for Pydantic 2.x
57
+ from pydantic import field_validator
58
+
59
+ def decorator(func: Callable) -> Callable:
60
+ @field_validator(field_name, *fields, mode=mode, **kwargs)
61
+ @functools.wraps(func)
62
+ def wrapper(cls, value, **kw):
63
+ return func(cls, value)
64
+
65
+ return wrapper
66
+
67
+ return decorator
68
+ else:
69
+ # Use validator for Pydantic 1.x
70
+ from pydantic import validator
71
+
72
+ # Map mode to Pydantic v1 parameters
73
+ pre = True if mode == "before" else False
74
+
75
+ def decorator(func: Callable) -> Callable:
76
+ @validator(field_name, *fields, pre=pre, **kwargs)
77
+ @functools.wraps(func)
78
+ def wrapper(cls, value, **kw):
79
+ return func(cls, value)
80
+
81
+ return wrapper
82
+
83
+ return decorator
@@ -22,6 +22,7 @@ from upgini.metadata import (
22
22
  EVAL_SET_INDEX,
23
23
  SYSTEM_RECORD_ID,
24
24
  TARGET,
25
+ AutoFEParameters,
25
26
  CVType,
26
27
  DataType,
27
28
  FeaturesFilter,
@@ -558,6 +559,7 @@ class Dataset: # (pd.DataFrame):
558
559
  filter_features: Optional[dict] = None,
559
560
  runtime_parameters: Optional[RuntimeParameters] = None,
560
561
  metrics_calculation: Optional[bool] = False,
562
+ auto_fe_parameters: Optional[AutoFEParameters] = None,
561
563
  ) -> SearchCustomization:
562
564
  # self.logger.info("Constructing search customization")
563
565
  search_customization = SearchCustomization(
@@ -585,7 +587,10 @@ class Dataset: # (pd.DataFrame):
585
587
  search_customization.featuresFilter = feature_filter
586
588
 
587
589
  search_customization.runtimeParameters.properties["etalon_imbalanced"] = self.imbalanced
588
-
590
+ if auto_fe_parameters is not None:
591
+ search_customization.runtimeParameters.properties["feature_generation_params.ts.gap_days"] = (
592
+ auto_fe_parameters.ts_gap_days
593
+ )
589
594
  return search_customization
590
595
 
591
596
  def _rename_generate_features(self, runtime_parameters: Optional[RuntimeParameters]) -> Optional[RuntimeParameters]:
@@ -640,6 +645,7 @@ class Dataset: # (pd.DataFrame):
640
645
  max_features: Optional[int] = None, # deprecated
641
646
  filter_features: Optional[dict] = None, # deprecated
642
647
  runtime_parameters: Optional[RuntimeParameters] = None,
648
+ auto_fe_parameters: Optional[AutoFEParameters] = None,
643
649
  force_downsampling: bool = False,
644
650
  ) -> SearchTask:
645
651
  if self.etalon_def is None:
@@ -658,6 +664,7 @@ class Dataset: # (pd.DataFrame):
658
664
  max_features=max_features,
659
665
  filter_features=filter_features,
660
666
  runtime_parameters=runtime_parameters,
667
+ auto_fe_parameters=auto_fe_parameters,
661
668
  )
662
669
 
663
670
  if self.file_upload_id is not None and self.rest_client.check_uploaded_file_v2(