upgini 1.2.69__py3-none-any.whl → 1.2.70__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.69"
1
+ __version__ = "1.2.70"
upgini/autofe/date.py CHANGED
@@ -8,6 +8,7 @@ from pandas.core.arrays.timedeltas import TimedeltaArray
8
8
  from pydantic import BaseModel, __version__ as pydantic_version
9
9
 
10
10
  from upgini.autofe.operator import PandasOperator, ParametrizedOperator
11
+ from upgini.autofe.utils import pydantic_validator
11
12
 
12
13
 
13
14
  def get_pydantic_version():
@@ -209,6 +210,14 @@ class DateListDiffBounded(DateListDiff, ParametrizedOperator):
209
210
 
210
211
  return cls(diff_unit=diff_unit, lower_bound=lower_bound, upper_bound=upper_bound, aggregation=aggregation)
211
212
 
213
+ def get_params(self) -> Dict[str, Optional[str]]:
214
+ res = super().get_params()
215
+ if self.lower_bound is not None:
216
+ res["lower_bound"] = str(self.lower_bound)
217
+ if self.upper_bound is not None:
218
+ res["upper_bound"] = str(self.upper_bound)
219
+ return res
220
+
212
221
  def _agg(self, x):
213
222
  x = x[
214
223
  (x >= (self.lower_bound if self.lower_bound is not None else -np.inf))
@@ -269,32 +278,17 @@ class DatePercentile(DatePercentileBase):
269
278
  {
270
279
  "zero_month": self.zero_month,
271
280
  "zero_year": self.zero_year,
272
- "zero_bounds": self.zero_bounds,
281
+ "zero_bounds": json.dumps(self.zero_bounds),
273
282
  "step": self.step,
274
283
  }
275
284
  )
276
285
  return res
277
286
 
278
- # Check Pydantic version
279
- if get_pydantic_version() >= 2:
280
- # Use @field_validator for Pydantic 2.x
281
- from pydantic import field_validator
282
-
283
- @field_validator("zero_bounds", mode="before")
284
- def parse_zero_bounds(cls, value):
285
- if isinstance(value, str):
286
- return json.loads(value)
287
- return value
288
-
289
- else:
290
- # Use @validator for Pydantic 1.x
291
- from pydantic import validator
292
-
293
- @validator("zero_bounds", pre=True)
294
- def parse_zero_bounds(cls, value):
295
- if isinstance(value, str):
296
- return json.loads(value)
297
- return value
287
+ @pydantic_validator("zero_bounds", mode="before")
288
+ def parse_zero_bounds(cls, value):
289
+ if isinstance(value, str):
290
+ return json.loads(value)
291
+ return value
298
292
 
299
293
  def _get_bounds(self, date_col: pd.Series) -> pd.Series:
300
294
  months = date_col.dt.month
upgini/autofe/feature.py CHANGED
@@ -112,7 +112,11 @@ class Feature:
112
112
 
113
113
  def get_hash(self) -> str:
114
114
  return hashlib.sha256(
115
- "_".join([self.op.get_hash_component()] + [ch.get_display_name() for ch in self.children]).encode("utf-8")
115
+ "_".join(
116
+ [self.op.get_hash_component()]
117
+ + [ch.op.get_hash_component() for ch in self.children if isinstance(ch, Feature)]
118
+ + [ch.get_display_name() for ch in self.children]
119
+ ).encode("utf-8")
116
120
  ).hexdigest()[:8]
117
121
 
118
122
  def set_alias(self, alias: str) -> "Feature":
@@ -4,12 +4,6 @@ from typing import Dict, List, Optional
4
4
  import pandas as pd
5
5
  from upgini.autofe.operator import PandasOperator
6
6
 
7
- # Used in derived classes
8
- try:
9
- from pydantic import field_validator as validator # V2
10
- except ImportError:
11
- from pydantic import validator # V1
12
-
13
7
 
14
8
  class TimeSeriesBase(PandasOperator, abc.ABC):
15
9
  is_vector: bool = True
@@ -85,7 +79,7 @@ class TimeSeriesBase(PandasOperator, abc.ABC):
85
79
  """
86
80
  import re
87
81
 
88
- offset_regex = f"{base_regex}_offset_(\\d+)([a-zA-Z])"
82
+ offset_regex = f"{base_regex}_offset_(\\d+)([a-zA-Z])$"
89
83
  match = re.match(offset_regex, formula)
90
84
 
91
85
  if match:
@@ -1,16 +1,13 @@
1
+ import json
1
2
  from typing import Dict, List, Optional
2
3
 
3
4
  import numpy as np
4
5
  import pandas as pd
5
6
 
6
- try:
7
- from pydantic import field_validator as validator # V2
8
- except ImportError:
9
- from pydantic import validator # V1
10
-
11
7
  from upgini.autofe.all_operators import find_op
12
8
  from upgini.autofe.operator import PandasOperator, ParametrizedOperator
13
9
  from upgini.autofe.timeseries.base import TimeSeriesBase
10
+ from upgini.autofe.utils import pydantic_validator
14
11
 
15
12
 
16
13
  class CrossSeriesInteraction(TimeSeriesBase, ParametrizedOperator):
@@ -20,11 +17,24 @@ class CrossSeriesInteraction(TimeSeriesBase, ParametrizedOperator):
20
17
  left_descriptor: List[str] = []
21
18
  right_descriptor: List[str] = []
22
19
 
23
- @validator("descriptor_indices")
24
- @classmethod
20
+ @pydantic_validator("descriptor_indices", mode="before")
25
21
  def validate_descriptor_indices(cls, v):
22
+ if isinstance(v, str):
23
+ v = json.loads(v)
26
24
  if not v:
27
- raise ValueError("descriptor_indices cannot be empty for CrossSeriesInteraction")
25
+ raise ValueError("descriptor_indices cannot be empty")
26
+ return v
27
+
28
+ @pydantic_validator("left_descriptor", "right_descriptor", mode="before")
29
+ def parse_descriptors(cls, v):
30
+ if isinstance(v, str):
31
+ return json.loads(v)
32
+ return v
33
+
34
+ @pydantic_validator("interaction_op", mode="before")
35
+ def validate_interaction_op(cls, v):
36
+ if isinstance(v, str):
37
+ return find_op(v)
28
38
  return v
29
39
 
30
40
  def __init__(self, **data):
@@ -88,9 +98,9 @@ class CrossSeriesInteraction(TimeSeriesBase, ParametrizedOperator):
88
98
  res.update(
89
99
  {
90
100
  "interaction_op": self._get_interaction_op_name(),
91
- "descriptor_indices": self.descriptor_indices,
92
- "left_descriptor": self.left_descriptor,
93
- "right_descriptor": self.right_descriptor,
101
+ "descriptor_indices": json.dumps(self.descriptor_indices),
102
+ "left_descriptor": json.dumps(self.left_descriptor),
103
+ "right_descriptor": json.dumps(self.right_descriptor),
94
104
  }
95
105
  )
96
106
  return res
@@ -3,6 +3,7 @@ from typing import Dict, Optional
3
3
 
4
4
  from upgini.autofe.operator import ParametrizedOperator
5
5
  from upgini.autofe.timeseries.base import TimeSeriesBase
6
+ from upgini.autofe.utils import pydantic_validator
6
7
 
7
8
  # Roll aggregation functions
8
9
  roll_aggregations = {
@@ -12,19 +13,13 @@ roll_aggregations = {
12
13
  "iqr": lambda x: x.quantile(0.75) - x.quantile(0.25),
13
14
  }
14
15
 
15
- try:
16
- from pydantic import field_validator as validator # V2
17
- except ImportError:
18
- from pydantic import validator # V1
19
-
20
16
 
21
17
  class Roll(TimeSeriesBase, ParametrizedOperator):
22
18
  aggregation: str
23
19
  window_size: int = 1
24
20
  window_unit: str = "D"
25
21
 
26
- @validator("window_unit")
27
- @classmethod
22
+ @pydantic_validator("window_unit")
28
23
  def validate_window_unit(cls, v: str) -> str:
29
24
  try:
30
25
  pd.tseries.frequencies.to_offset(v)
@@ -2,10 +2,11 @@ from typing import Dict, Optional, Union
2
2
  import numpy as np
3
3
  import pandas as pd
4
4
 
5
+ from upgini.autofe.operator import ParametrizedOperator
5
6
  from upgini.autofe.timeseries.base import TimeSeriesBase
6
7
 
7
8
 
8
- class TrendCoefficient(TimeSeriesBase):
9
+ class TrendCoefficient(TimeSeriesBase, ParametrizedOperator):
9
10
  name: str = "trend_coef"
10
11
  step_size: int = 1
11
12
  step_unit: str = "D"
upgini/autofe/utils.py ADDED
@@ -0,0 +1,83 @@
1
+ """
2
+ Utility functions for autofe module.
3
+ """
4
+
5
+ import functools
6
+ from typing import Callable
7
+
8
+
9
+ def get_pydantic_version():
10
+ """
11
+ Get the major version of pydantic.
12
+
13
+ Returns:
14
+ int: Major version number (1 or 2)
15
+ """
16
+ try:
17
+ from pydantic import __version__ as pydantic_version
18
+
19
+ major_version = int(pydantic_version.split(".")[0])
20
+ return major_version
21
+ except (ImportError, ValueError):
22
+ # Default to version 1 if unable to determine
23
+ return 1
24
+
25
+
26
+ def pydantic_validator(field_name: str, *fields, mode: str = "before", **kwargs):
27
+ """
28
+ A decorator that applies the appropriate Pydantic validator based on the installed version.
29
+
30
+ This decorator handles the differences between Pydantic v1 and v2 validator syntax,
31
+ making it easier to write code that works with both versions.
32
+
33
+ Args:
34
+ field_name (str): The name of the field to validate
35
+ mode (str): The validation mode, either "before" or "after" (for Pydantic v2)
36
+ **kwargs: Additional arguments to pass to the validator
37
+
38
+ Returns:
39
+ Callable: A decorator that can be applied to validator methods
40
+
41
+ Example:
42
+ ```python
43
+ class MyModel(BaseModel):
44
+ items: List[int]
45
+
46
+ @pydantic_validator("items")
47
+ def parse_items(cls, value):
48
+ if isinstance(value, str):
49
+ return [int(x) for x in value.split(",")]
50
+ return value
51
+ ```
52
+ """
53
+ pydantic_version = get_pydantic_version()
54
+
55
+ if pydantic_version >= 2:
56
+ # Use field_validator for Pydantic 2.x
57
+ from pydantic import field_validator
58
+
59
+ def decorator(func: Callable) -> Callable:
60
+ @field_validator(field_name, *fields, mode=mode, **kwargs)
61
+ @functools.wraps(func)
62
+ def wrapper(cls, value, **kw):
63
+ return func(cls, value)
64
+
65
+ return wrapper
66
+
67
+ return decorator
68
+ else:
69
+ # Use validator for Pydantic 1.x
70
+ from pydantic import validator
71
+
72
+ # Map mode to Pydantic v1 parameters
73
+ pre = True if mode == "before" else False
74
+
75
+ def decorator(func: Callable) -> Callable:
76
+ @validator(field_name, *fields, pre=pre, **kwargs)
77
+ @functools.wraps(func)
78
+ def wrapper(cls, value, **kw):
79
+ return func(cls, value)
80
+
81
+ return wrapper
82
+
83
+ return decorator
upgini/dataset.py CHANGED
@@ -22,6 +22,7 @@ from upgini.metadata import (
22
22
  EVAL_SET_INDEX,
23
23
  SYSTEM_RECORD_ID,
24
24
  TARGET,
25
+ AutoFEParameters,
25
26
  CVType,
26
27
  DataType,
27
28
  FeaturesFilter,
@@ -558,6 +559,7 @@ class Dataset: # (pd.DataFrame):
558
559
  filter_features: Optional[dict] = None,
559
560
  runtime_parameters: Optional[RuntimeParameters] = None,
560
561
  metrics_calculation: Optional[bool] = False,
562
+ auto_fe_parameters: Optional[AutoFEParameters] = None,
561
563
  ) -> SearchCustomization:
562
564
  # self.logger.info("Constructing search customization")
563
565
  search_customization = SearchCustomization(
@@ -585,7 +587,10 @@ class Dataset: # (pd.DataFrame):
585
587
  search_customization.featuresFilter = feature_filter
586
588
 
587
589
  search_customization.runtimeParameters.properties["etalon_imbalanced"] = self.imbalanced
588
-
590
+ if auto_fe_parameters is not None:
591
+ search_customization.runtimeParameters.properties["feature_generation_params.ts.gap_days"] = (
592
+ auto_fe_parameters.ts_gap_days
593
+ )
589
594
  return search_customization
590
595
 
591
596
  def _rename_generate_features(self, runtime_parameters: Optional[RuntimeParameters]) -> Optional[RuntimeParameters]:
@@ -640,6 +645,7 @@ class Dataset: # (pd.DataFrame):
640
645
  max_features: Optional[int] = None, # deprecated
641
646
  filter_features: Optional[dict] = None, # deprecated
642
647
  runtime_parameters: Optional[RuntimeParameters] = None,
648
+ auto_fe_parameters: Optional[AutoFEParameters] = None,
643
649
  force_downsampling: bool = False,
644
650
  ) -> SearchTask:
645
651
  if self.etalon_def is None:
@@ -658,6 +664,7 @@ class Dataset: # (pd.DataFrame):
658
664
  max_features=max_features,
659
665
  filter_features=filter_features,
660
666
  runtime_parameters=runtime_parameters,
667
+ auto_fe_parameters=auto_fe_parameters,
661
668
  )
662
669
 
663
670
  if self.file_upload_id is not None and self.rest_client.check_uploaded_file_v2(