upgini 1.2.62a3818.dev2__py3-none-any.whl → 1.2.62a3818.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.62a3818.dev2"
1
+ __version__ = "1.2.62a3818.dev4"
@@ -7,4 +7,4 @@ from upgini.autofe.vector import * # noqa
7
7
 
8
8
 
9
9
  def find_op(name):
10
- return OperatorRegistry.get_operand(name)
10
+ return OperatorRegistry.get_operator(name)
upgini/autofe/operator.py CHANGED
@@ -20,23 +20,25 @@ class OperatorRegistry(type(BaseModel)):
20
20
  base_names.update(b.__name__ for b in base.__bases__)
21
21
  base_classes.extend(base.__bases__)
22
22
 
23
- if "Operand" in base_names:
23
+ if "Operator" in base_names:
24
24
  # Track parametrized operands separately
25
- if "ParametrizedOperand" in base_names:
25
+ if "ParametrizedOperator" in base_names:
26
26
  cls._parametrized_registry.append(new_class)
27
27
  else:
28
28
  try:
29
29
  instance = new_class()
30
30
  cls._registry[instance.name] = new_class
31
+ if instance.alias:
32
+ cls._registry[instance.alias] = new_class
31
33
  except Exception:
32
34
  pass
33
35
  return new_class
34
36
 
35
37
  @classmethod
36
- def get_operand(cls, name: str) -> Optional["Operator"]:
38
+ def get_operator(cls, name: str) -> Optional["Operator"]:
37
39
  # First try to resolve as a parametrized operand formula
38
- for operand_cls in cls._parametrized_registry:
39
- resolved = operand_cls.from_formula(name)
40
+ for operator_cls in cls._parametrized_registry:
41
+ resolved = operator_cls.from_formula(name)
40
42
  if resolved is not None:
41
43
  return resolved
42
44
  # Fall back to direct registry lookup
@@ -0,0 +1,23 @@
1
+ """Time series feature engineering operators."""
2
+
3
+ from upgini.autofe.timeseries.base import TimeSeriesBase
4
+ from upgini.autofe.timeseries.roll import Roll
5
+ from upgini.autofe.timeseries.lag import Lag
6
+ from upgini.autofe.timeseries.delta import Delta, Delta2
7
+ from upgini.autofe.timeseries.trend import TrendCoefficient
8
+ from upgini.autofe.timeseries.volatility import EWMAVolatility, RollingVolatility, RollingVolatility2, VolatilityRatio
9
+ from upgini.autofe.timeseries.cross import CrossSeriesInteraction
10
+
11
+ __all__ = [
12
+ "TimeSeriesBase",
13
+ "Roll",
14
+ "Lag",
15
+ "Delta",
16
+ "Delta2",
17
+ "TrendCoefficient",
18
+ "EWMAVolatility",
19
+ "RollingVolatility",
20
+ "RollingVolatility2",
21
+ "VolatilityRatio",
22
+ "CrossSeriesInteraction",
23
+ ]
@@ -0,0 +1,105 @@
1
+ import abc
2
+ from typing import Dict, List, Optional
3
+
4
+ import pandas as pd
5
+ from upgini.autofe.operator import PandasOperator
6
+
7
+ # Used in derived classes
8
+ try:
9
+ from pydantic import field_validator as validator # V2
10
+ except ImportError:
11
+ from pydantic import validator # V1
12
+
13
+
14
+ class TimeSeriesBase(PandasOperator, abc.ABC):
15
+ is_vector: bool = True
16
+ date_unit: Optional[str] = None
17
+ offset_size: int = 0
18
+ offset_unit: str = "D"
19
+
20
+ def get_params(self) -> Dict[str, Optional[str]]:
21
+ res = super().get_params()
22
+ res.update(
23
+ {
24
+ "date_unit": self.date_unit,
25
+ "offset_size": self.offset_size,
26
+ "offset_unit": self.offset_unit,
27
+ }
28
+ )
29
+ return res
30
+
31
+ def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
32
+ # assuming first is date, last is value, rest is group columns
33
+ date = pd.to_datetime(data[0], unit=self.date_unit, errors="coerce")
34
+ ts = pd.concat([date] + data[1:], axis=1)
35
+ ts.drop_duplicates(subset=ts.columns[:-1], keep="first", inplace=True)
36
+ ts.set_index(date.name, inplace=True)
37
+ ts = ts[ts.index.notna()].sort_index()
38
+ ts = (
39
+ ts.groupby([c.name for c in data[1:-1]], group_keys=True)
40
+ .apply(self._shift)[data[-1].name]
41
+ .to_frame()
42
+ .reset_index()
43
+ .set_index(date.name)
44
+ .groupby([c.name for c in data[1:-1]], group_keys=True)
45
+ if len(data) > 2
46
+ else self._shift(ts)
47
+ )
48
+ ts = self._aggregate(ts)
49
+ ts = ts.reindex(data[1:-1] + [date] if len(data) > 2 else date).reset_index()
50
+ ts.index = date.index
51
+
52
+ return ts.iloc[:, -1]
53
+
54
+ def _shift(self, ts: pd.DataFrame) -> pd.DataFrame:
55
+ if self.offset_size > 0:
56
+ return ts.iloc[:, :-1].merge(
57
+ ts.iloc[:, -1].shift(freq=f"{self.offset_size}{self.offset_unit}"),
58
+ left_index=True,
59
+ right_index=True,
60
+ )
61
+ return ts
62
+
63
+ @abc.abstractmethod
64
+ def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
65
+ pass
66
+
67
+ def _add_offset_to_formula(self, base_formula: str) -> str:
68
+ if self.offset_size > 0:
69
+ return f"{base_formula}_offset_{self.offset_size}{self.offset_unit}"
70
+ return base_formula
71
+
72
+ @classmethod
73
+ def _parse_offset_from_formula(cls, formula: str, base_regex: str) -> tuple[Optional[dict], Optional[str]]:
74
+ """
75
+ Parse the offset component from a formula.
76
+
77
+ Args:
78
+ formula: The formula to parse
79
+ base_regex: The regex pattern for the base formula (without offset)
80
+
81
+ Returns:
82
+ A tuple with:
83
+ - Dictionary with offset parameters if found, None otherwise
84
+ - Remaining part of the formula after removing offset component (for further parsing)
85
+ """
86
+ import re
87
+
88
+ offset_regex = f"{base_regex}_offset_(\\d+)([a-zA-Z])"
89
+ match = re.match(offset_regex, formula)
90
+
91
+ if match:
92
+ # Get groups from the offset part
93
+ offset_size = int(match.group(match.lastindex - 1))
94
+ offset_unit = match.group(match.lastindex)
95
+
96
+ # Return the parameters and the base formula for further parsing if needed
97
+ # Extract the base formula by using the match object
98
+ base_formula = formula[: match.start(match.lastindex - 1) - len("_offset_")]
99
+ return {"offset_size": offset_size, "offset_unit": offset_unit}, base_formula
100
+
101
+ # Check if it matches the base regex (no offset)
102
+ if re.match(f"^{base_regex}$", formula) or re.match(f"^{base_regex}_", formula):
103
+ return None, formula
104
+
105
+ return None, None
@@ -0,0 +1,130 @@
1
+ from typing import Dict, List, Optional
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ try:
7
+ from pydantic import field_validator as validator # V2
8
+ except ImportError:
9
+ from pydantic import validator # V1
10
+
11
+ from upgini.autofe.all_operands import find_op
12
+ from upgini.autofe.operator import PandasOperator, ParametrizedOperator
13
+ from upgini.autofe.timeseries.base import TimeSeriesBase
14
+
15
+
16
+ class CrossSeriesInteraction(TimeSeriesBase, ParametrizedOperator):
17
+ base_name: str = "cross"
18
+ interaction_op: PandasOperator
19
+ descriptor_indices: List[int] = []
20
+ left_descriptor: List[str] = []
21
+ right_descriptor: List[str] = []
22
+
23
+ @validator("descriptor_indices")
24
+ @classmethod
25
+ def validate_descriptor_indices(cls, v):
26
+ if not v:
27
+ raise ValueError("descriptor_indices cannot be empty for CrossSeriesInteraction")
28
+ return v
29
+
30
+ def __init__(self, **data):
31
+ super().__init__(**data)
32
+ indices = self.descriptor_indices
33
+ left = self.left_descriptor
34
+ right = self.right_descriptor
35
+
36
+ if len(left) != len(indices):
37
+ raise ValueError(
38
+ f"left_descriptor length ({len(left)}) " f"must match descriptor_indices length ({len(indices)})"
39
+ )
40
+
41
+ if len(right) != len(indices):
42
+ raise ValueError(
43
+ f"right_descriptor length ({len(right)}) " f"must match descriptor_indices length ({len(indices)})"
44
+ )
45
+
46
+ def to_formula(self) -> str:
47
+ base_formula = f"{self.base_name}_{self._get_interaction_op_name()}"
48
+ return self._add_offset_to_formula(base_formula)
49
+
50
+ @classmethod
51
+ def from_formula(cls, formula: str) -> Optional["CrossSeriesInteraction"]:
52
+ base_regex = r"cross_(.+)"
53
+
54
+ offset_params, remaining_formula = cls._parse_offset_from_formula(formula, base_regex)
55
+
56
+ if remaining_formula is None:
57
+ return None
58
+
59
+ import re
60
+
61
+ match = re.match(f"^{base_regex}$", remaining_formula)
62
+
63
+ if not match:
64
+ return None
65
+
66
+ # Extract the operator formula
67
+ op_formula = match.group(1)
68
+
69
+ op = find_op(op_formula)
70
+ if op is None or not op.is_binary:
71
+ return None
72
+
73
+ # Include default values to pass validation
74
+ params = {
75
+ "interaction_op": op,
76
+ "descriptor_indices": [0], # Default index
77
+ "left_descriptor": ["default"], # Default left descriptor
78
+ "right_descriptor": ["default"], # Default right descriptor
79
+ }
80
+
81
+ if offset_params:
82
+ params.update(offset_params)
83
+
84
+ return cls(**params)
85
+
86
+ def get_params(self) -> Dict[str, str | None]:
87
+ res = super().get_params()
88
+ res.update(
89
+ {
90
+ "interaction_op": self._get_interaction_op_name(),
91
+ "descriptor_indices": self.descriptor_indices,
92
+ "left_descriptor": self.left_descriptor,
93
+ "right_descriptor": self.right_descriptor,
94
+ }
95
+ )
96
+ return res
97
+
98
+ def _get_interaction_op_name(self) -> str:
99
+ return self.interaction_op.alias or self.interaction_op.to_formula()
100
+
101
+ def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
102
+ left_mask = self._get_mask(data, self.left_descriptor)
103
+ left = self._extract_series(data, left_mask)
104
+
105
+ right_mask = self._get_mask(data, self.right_descriptor)
106
+ right = self._extract_series(data, right_mask)
107
+
108
+ interaction: pd.Series = self.interaction_op.calculate_binary(left, right)
109
+ interaction = interaction.reindex(self._get_index(data))
110
+ res = pd.Series(np.nan, index=data[-1].index, name=data[-1].name)
111
+ res.loc[left_mask] = interaction[left_mask].values
112
+ res.loc[right_mask] = interaction[right_mask].values
113
+ return res
114
+
115
+ def _get_mask(self, data: List[pd.Series], descriptor: List[str]) -> pd.Series:
116
+ mask = np.logical_and.reduce([data[i] == v for i, v in zip(self.descriptor_indices, descriptor)])
117
+ return mask
118
+
119
+ def _extract_series(self, data: List[pd.Series], mask: pd.Series) -> pd.Series:
120
+ masked_data = [d[mask] for d in data]
121
+ shifted = super().calculate_vector(masked_data)
122
+ shifted.index = self._get_index(masked_data)
123
+ return shifted
124
+
125
+ def _get_index(self, data: List[pd.Series]) -> pd.Series:
126
+ index = [d for i, d in enumerate(data[:-1]) if i not in self.descriptor_indices]
127
+ return index if len(index) > 1 else index[0]
128
+
129
+ def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
130
+ return ts.apply(lambda x: x).iloc[:, [-1]]
@@ -0,0 +1,125 @@
1
+ import pandas as pd
2
+ from typing import Dict, Optional
3
+
4
+ from upgini.autofe.operator import ParametrizedOperator
5
+ from upgini.autofe.timeseries.base import TimeSeriesBase
6
+ from upgini.autofe.timeseries.lag import Lag
7
+
8
+
9
+ class Delta(TimeSeriesBase, ParametrizedOperator):
10
+ delta_size: int
11
+ delta_unit: str = "D"
12
+
13
+ def to_formula(self) -> str:
14
+ base_formula = f"delta_{self.delta_size}{self.delta_unit}"
15
+ return self._add_offset_to_formula(base_formula)
16
+
17
+ @classmethod
18
+ def from_formula(cls, formula: str) -> Optional["Delta"]:
19
+ # Base regex for Delta class
20
+ base_regex = r"delta_(\d+)([a-zA-Z])"
21
+
22
+ # Parse offset first
23
+ offset_params, remaining_formula = cls._parse_offset_from_formula(formula, base_regex)
24
+
25
+ if remaining_formula is None:
26
+ return None
27
+
28
+ # Now parse the delta part
29
+ import re
30
+
31
+ match = re.match(f"^{base_regex}$", remaining_formula)
32
+
33
+ if not match:
34
+ return None
35
+
36
+ delta_size = int(match.group(1))
37
+ delta_unit = match.group(2)
38
+
39
+ # Create instance with appropriate parameters
40
+ params = {
41
+ "delta_size": delta_size,
42
+ "delta_unit": delta_unit,
43
+ }
44
+
45
+ if offset_params:
46
+ params.update(offset_params)
47
+
48
+ return cls(**params)
49
+
50
+ def get_params(self) -> Dict[str, Optional[str]]:
51
+ res = super().get_params()
52
+ res.update(
53
+ {
54
+ "delta_size": self.delta_size,
55
+ "delta_unit": self.delta_unit,
56
+ "offset_size": self.offset_size,
57
+ "offset_unit": self.offset_unit,
58
+ }
59
+ )
60
+ return res
61
+
62
+ def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
63
+ lag0 = Lag(lag_size=0, lag_unit=self.delta_unit)
64
+ lag = Lag(lag_size=self.delta_size, lag_unit=self.delta_unit)
65
+ return lag0._aggregate(ts) - lag._aggregate(ts)
66
+
67
+
68
+ class Delta2(TimeSeriesBase, ParametrizedOperator):
69
+ delta_size: int
70
+ delta_unit: str = "D"
71
+
72
+ def to_formula(self) -> str:
73
+ base_formula = f"delta2_{self.delta_size}{self.delta_unit}"
74
+ return self._add_offset_to_formula(base_formula)
75
+
76
+ @classmethod
77
+ def from_formula(cls, formula: str) -> Optional["Delta2"]:
78
+ # Base regex for Delta2 class
79
+ base_regex = r"delta2_(\d+)([a-zA-Z])"
80
+
81
+ # Parse offset first
82
+ offset_params, remaining_formula = cls._parse_offset_from_formula(formula, base_regex)
83
+
84
+ if remaining_formula is None:
85
+ return None
86
+
87
+ # Now parse the delta part
88
+ import re
89
+
90
+ match = re.match(f"^{base_regex}$", remaining_formula)
91
+
92
+ if not match:
93
+ return None
94
+
95
+ delta_size = int(match.group(1))
96
+ delta_unit = match.group(2)
97
+
98
+ # Create instance with appropriate parameters
99
+ params = {
100
+ "delta_size": delta_size,
101
+ "delta_unit": delta_unit,
102
+ }
103
+
104
+ if offset_params:
105
+ params.update(offset_params)
106
+
107
+ return cls(**params)
108
+
109
+ def get_params(self) -> Dict[str, Optional[str]]:
110
+ res = super().get_params()
111
+ res.update(
112
+ {
113
+ "delta_size": self.delta_size,
114
+ "delta_unit": self.delta_unit,
115
+ }
116
+ )
117
+ return res
118
+
119
+ def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
120
+ # Calculate first delta
121
+ delta1 = Delta(delta_size=self.delta_size, delta_unit=self.delta_unit)
122
+ first_delta = delta1._aggregate(ts)
123
+
124
+ # Calculate delta of delta (second derivative)
125
+ return delta1._aggregate(first_delta)
@@ -0,0 +1,68 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from typing import Dict, Optional
4
+
5
+ from upgini.autofe.operator import ParametrizedOperator
6
+ from upgini.autofe.timeseries.base import TimeSeriesBase
7
+
8
+
9
+ class Lag(TimeSeriesBase, ParametrizedOperator):
10
+ lag_size: int
11
+ lag_unit: str = "D"
12
+
13
+ def to_formula(self) -> str:
14
+ base_formula = f"lag_{self.lag_size}{self.lag_unit}"
15
+ return self._add_offset_to_formula(base_formula)
16
+
17
+ @classmethod
18
+ def from_formula(cls, formula: str) -> Optional["Lag"]:
19
+ # Base regex for Lag class
20
+ base_regex = r"lag_(\d+)([a-zA-Z])"
21
+
22
+ # Parse offset first
23
+ offset_params, remaining_formula = cls._parse_offset_from_formula(formula, base_regex)
24
+
25
+ if remaining_formula is None:
26
+ return None
27
+
28
+ # Now parse the lag part
29
+ import re
30
+
31
+ match = re.match(f"^{base_regex}$", remaining_formula)
32
+
33
+ if not match:
34
+ return None
35
+
36
+ lag_size = int(match.group(1))
37
+ lag_unit = match.group(2)
38
+
39
+ # Create instance with appropriate parameters
40
+ params = {
41
+ "lag_size": lag_size,
42
+ "lag_unit": lag_unit,
43
+ }
44
+
45
+ if offset_params:
46
+ params.update(offset_params)
47
+
48
+ return cls(**params)
49
+
50
+ def get_params(self) -> Dict[str, Optional[str]]:
51
+ res = super().get_params()
52
+ res.update(
53
+ {
54
+ "lag_size": self.lag_size,
55
+ "lag_unit": self.lag_unit,
56
+ }
57
+ )
58
+ return res
59
+
60
+ def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
61
+ lag_window = self.lag_size + 1
62
+ return ts.rolling(f"{lag_window}{self.lag_unit}", min_periods=1).agg(self._lag)
63
+
64
+ def _lag(self, x):
65
+ if x.index.min() > (x.index.max() - pd.Timedelta(self.lag_size, self.lag_unit)):
66
+ return np.nan
67
+ else:
68
+ return x[0]
@@ -0,0 +1,92 @@
1
+ import pandas as pd
2
+ from typing import Dict, Optional
3
+
4
+ from upgini.autofe.operator import ParametrizedOperator
5
+ from upgini.autofe.timeseries.base import TimeSeriesBase
6
+
7
+ # Roll aggregation functions
8
+ roll_aggregations = {
9
+ "norm_mean": lambda x: x[-1] / x.mean(),
10
+ "q25": lambda x: x.quantile(0.25),
11
+ "q75": lambda x: x.quantile(0.75),
12
+ "iqr": lambda x: x.quantile(0.75) - x.quantile(0.25),
13
+ }
14
+
15
+ try:
16
+ from pydantic import field_validator as validator # V2
17
+ except ImportError:
18
+ from pydantic import validator # V1
19
+
20
+
21
+ class Roll(TimeSeriesBase, ParametrizedOperator):
22
+ aggregation: str
23
+ window_size: int = 1
24
+ window_unit: str = "D"
25
+
26
+ @validator("window_unit")
27
+ @classmethod
28
+ def validate_window_unit(cls, v: str) -> str:
29
+ try:
30
+ pd.tseries.frequencies.to_offset(v)
31
+ return v
32
+ except ValueError:
33
+ raise ValueError(
34
+ f"Invalid window_unit: {v}. Must be a valid pandas frequency string (e.g. 'D', 'H', 'T', etc)"
35
+ )
36
+
37
+ def to_formula(self) -> str:
38
+ # First add window size and unit, then add aggregation, then add offset
39
+ base_formula = f"roll_{self.window_size}{self.window_unit}"
40
+ formula_with_agg = f"{base_formula}_{self.aggregation}"
41
+ return self._add_offset_to_formula(formula_with_agg)
42
+
43
+ @classmethod
44
+ def from_formula(cls, formula: str) -> Optional["Roll"]:
45
+ import re
46
+
47
+ # Base regex for Roll class (with aggregation)
48
+ base_regex = r"roll_(\d+)([a-zA-Z])_(\w+)"
49
+
50
+ # Parse offset first - this removes the offset part if present
51
+ offset_params, remaining_formula = cls._parse_offset_from_formula(formula, base_regex)
52
+
53
+ if remaining_formula is None:
54
+ return None
55
+
56
+ # Parse the window part and aggregation
57
+ match = re.match(f"^{base_regex}$", remaining_formula)
58
+
59
+ if not match:
60
+ return None
61
+
62
+ window_size = int(match.group(1))
63
+ window_unit = match.group(2)
64
+ aggregation = match.group(3)
65
+
66
+ # Create instance with appropriate parameters
67
+ params = {
68
+ "window_size": window_size,
69
+ "window_unit": window_unit,
70
+ "aggregation": aggregation,
71
+ }
72
+
73
+ if offset_params:
74
+ params.update(offset_params)
75
+
76
+ return cls(**params)
77
+
78
+ def get_params(self) -> Dict[str, Optional[str]]:
79
+ res = super().get_params()
80
+ res.update(
81
+ {
82
+ "window_size": self.window_size,
83
+ "window_unit": self.window_unit,
84
+ "aggregation": self.aggregation,
85
+ }
86
+ )
87
+ return res
88
+
89
+ def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
90
+ return ts.rolling(f"{self.window_size}{self.window_unit}", min_periods=1).agg(
91
+ roll_aggregations.get(self.aggregation, self.aggregation)
92
+ )
@@ -0,0 +1,61 @@
1
+ from typing import Dict, Optional, Union
2
+ import numpy as np
3
+ import pandas as pd
4
+
5
+ from upgini.autofe.timeseries.base import TimeSeriesBase
6
+
7
+
8
+ class TrendCoefficient(TimeSeriesBase):
9
+ name: str = "trend_coef"
10
+ step_size: int = 1
11
+ step_unit: str = "D"
12
+
13
+ def to_formula(self) -> str:
14
+ base_formula = "trend_coef"
15
+ return self._add_offset_to_formula(base_formula)
16
+
17
+ @classmethod
18
+ def from_formula(cls, formula: str) -> Optional["TrendCoefficient"]:
19
+ # Base regex for TrendCoefficient class
20
+ base_regex = r"trend_coef"
21
+
22
+ # Parse offset first
23
+ offset_params, remaining_formula = cls._parse_offset_from_formula(formula, base_regex)
24
+
25
+ if remaining_formula is None:
26
+ return None
27
+
28
+ # Basic pattern (no offset)
29
+ if remaining_formula == "trend_coef":
30
+ params = {}
31
+ if offset_params:
32
+ params.update(offset_params)
33
+ return cls(**params)
34
+
35
+ return None
36
+
37
+ def get_params(self) -> Dict[str, Optional[str]]:
38
+ res = super().get_params()
39
+ res.update(
40
+ {
41
+ "step_size": self.step_size,
42
+ "step_unit": self.step_unit,
43
+ "offset_size": self.offset_size,
44
+ "offset_unit": self.offset_unit,
45
+ }
46
+ )
47
+ return res
48
+
49
+ def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
50
+ return ts.apply(self._trend_coef).iloc[:, [-1]].fillna(0)
51
+
52
+ def _trend_coef(self, x: Union[pd.DataFrame, pd.Series]) -> Union[pd.DataFrame, pd.Series]:
53
+ return_series = isinstance(x, pd.Series)
54
+ x = pd.DataFrame(x)
55
+ resampled = (
56
+ x.iloc[:, -1].resample(f"{self.step_size}{self.step_unit}").fillna(method="ffill").fillna(method="bfill")
57
+ )
58
+ idx = np.arange(len(resampled))
59
+ coeffs = np.polyfit(idx, resampled, 1)
60
+ x.iloc[:, -1] = coeffs[0]
61
+ return x.iloc[:, -1] if return_series else x
@@ -0,0 +1,259 @@
1
+ from typing import Dict, Optional, Union
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from upgini.autofe.operator import ParametrizedOperator
6
+ from upgini.autofe.timeseries.base import TimeSeriesBase
7
+
8
+
9
+ class VolatilityBase(TimeSeriesBase):
10
+ @staticmethod
11
+ def _get_returns(ts: pd.Series, freq: str) -> pd.Series:
12
+ return ts.pct_change(freq=freq).fillna(0)
13
+
14
+
15
+ class EWMAVolatility(VolatilityBase, ParametrizedOperator):
16
+ step_size: int = 1
17
+ step_unit: str = "D"
18
+ window_size: int
19
+
20
+ def to_formula(self) -> str:
21
+ base_formula = f"ewma_vol_{self.window_size}"
22
+ return self._add_offset_to_formula(base_formula)
23
+
24
+ @classmethod
25
+ def from_formula(cls, formula: str) -> Optional["EWMAVolatility"]:
26
+ base_regex = r"ewma_vol_(\d+)"
27
+
28
+ offset_params, remaining_formula = cls._parse_offset_from_formula(formula, base_regex)
29
+
30
+ if remaining_formula is None:
31
+ return None
32
+
33
+ import re
34
+
35
+ match = re.match(f"^{base_regex}$", remaining_formula)
36
+
37
+ if not match:
38
+ return None
39
+
40
+ window_size = int(match.group(1))
41
+
42
+ params = {
43
+ "window_size": window_size,
44
+ }
45
+
46
+ if offset_params:
47
+ params.update(offset_params)
48
+
49
+ return cls(**params)
50
+
51
+ def get_params(self) -> Dict[str, Optional[str]]:
52
+ res = super().get_params()
53
+ res.update(
54
+ {
55
+ "step_size": self.step_size,
56
+ "step_unit": self.step_unit,
57
+ "window_size": self.window_size,
58
+ }
59
+ )
60
+ return res
61
+
62
+ def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
63
+ return ts.apply(self._ewma_vol)
64
+
65
+ def _ewma_vol(self, x):
66
+ x = pd.DataFrame(x).iloc[:, -1]
67
+ returns = self._get_returns(x, f"{self.step_size}{self.step_unit}")
68
+ return returns.ewm(span=self.window_size).std()
69
+
70
+
71
+ class RollingVolBase(VolatilityBase):
72
+ step_size: int = 1
73
+ step_unit: str = "D"
74
+ window_size: int
75
+ window_unit: str = "D"
76
+
77
+ def get_params(self) -> Dict[str, Optional[str]]:
78
+ res = super().get_params()
79
+ res.update(
80
+ {
81
+ "step_size": self.step_size,
82
+ "step_unit": self.step_unit,
83
+ "window_size": self.window_size,
84
+ "window_unit": self.window_unit,
85
+ }
86
+ )
87
+ return res
88
+
89
+ def _rolling_vol(
90
+ self, x: Union[pd.DataFrame, pd.Series], window_size: int, window_unit: str, abs_returns: bool = False
91
+ ) -> Union[pd.DataFrame, pd.Series]:
92
+ return_series = isinstance(x, pd.Series)
93
+ x = pd.DataFrame(x)
94
+ returns = self._get_returns(x.iloc[:, -1], f"{self.step_size}{self.step_unit}")
95
+ if abs_returns:
96
+ returns = returns.abs()
97
+ x.iloc[:, -1] = returns.rolling(f"{window_size}{window_unit}", min_periods=1).std()
98
+ return x.iloc[:, -1] if return_series else x
99
+
100
+
101
+ class RollingVolatility(RollingVolBase, ParametrizedOperator):
102
+ abs_returns: bool = False
103
+
104
+ def to_formula(self) -> str:
105
+ base_formula = f"roll_vol_{self.window_size}{self.window_unit}"
106
+ return self._add_offset_to_formula(base_formula)
107
+
108
+ @classmethod
109
+ def from_formula(cls, formula: str) -> Optional["RollingVolatility"]:
110
+ base_regex = r"roll_vol_(\d+)([a-zA-Z])"
111
+
112
+ offset_params, remaining_formula = cls._parse_offset_from_formula(formula, base_regex)
113
+
114
+ if remaining_formula is None:
115
+ return None
116
+
117
+ import re
118
+
119
+ match = re.match(f"^{base_regex}$", remaining_formula)
120
+
121
+ if not match:
122
+ return None
123
+
124
+ window_size = int(match.group(1))
125
+ window_unit = match.group(2)
126
+
127
+ params = {
128
+ "window_size": window_size,
129
+ "window_unit": window_unit,
130
+ }
131
+
132
+ if offset_params:
133
+ params.update(offset_params)
134
+
135
+ return cls(**params)
136
+
137
+ def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
138
+ return ts.apply(
139
+ self._rolling_vol, window_size=self.window_size, window_unit=self.window_unit, abs_returns=self.abs_returns
140
+ ).iloc[:, [-1]]
141
+
142
+
143
+ class RollingVolatility2(RollingVolBase, ParametrizedOperator):
144
+ """
145
+ Computes the volatility on volatility of a time series. Volatility is computed using the RollingVolatility.
146
+ """
147
+
148
+ def to_formula(self) -> str:
149
+ base_formula = f"roll_vol2_{self.window_size}{self.window_unit}"
150
+ return self._add_offset_to_formula(base_formula)
151
+
152
+ @classmethod
153
+ def from_formula(cls, formula: str) -> Optional["RollingVolatility2"]:
154
+ base_regex = r"roll_vol2_(\d+)([a-zA-Z])"
155
+
156
+ offset_params, remaining_formula = cls._parse_offset_from_formula(formula, base_regex)
157
+
158
+ if remaining_formula is None:
159
+ return None
160
+
161
+ import re
162
+
163
+ match = re.match(f"^{base_regex}$", remaining_formula)
164
+
165
+ if not match:
166
+ return None
167
+
168
+ window_size = int(match.group(1))
169
+ window_unit = match.group(2)
170
+
171
+ params = {
172
+ "window_size": window_size,
173
+ "window_unit": window_unit,
174
+ }
175
+
176
+ if offset_params:
177
+ params.update(offset_params)
178
+
179
+ return cls(**params)
180
+
181
+ def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
182
+ return ts.apply(self._vol_on_vol).iloc[:, [-1]]
183
+
184
+ def _vol_on_vol(self, x: Union[pd.DataFrame, pd.Series]) -> Union[pd.DataFrame, pd.Series]:
185
+ vol1 = self._rolling_vol(x, self.window_size, self.window_unit, abs_returns=True)
186
+ vol2 = self._rolling_vol(vol1, self.window_size, self.window_unit, abs_returns=False)
187
+ return vol2
188
+
189
+
190
+ class VolatilityRatio(RollingVolBase, ParametrizedOperator):
191
+ """
192
+ Computes the ratio of short-term volatility to long-term volatility.
193
+ Both volatilities are computed using RollingVolatility.
194
+ """
195
+
196
+ short_window_size: int
197
+ short_window_unit: str = "D"
198
+
199
+ def to_formula(self) -> str:
200
+ base_formula = (
201
+ f"vol_ratio_{self.short_window_size}{self.short_window_unit}_to_{self.window_size}{self.window_unit}"
202
+ )
203
+ return self._add_offset_to_formula(base_formula)
204
+
205
+ @classmethod
206
+ def from_formula(cls, formula: str) -> Optional["VolatilityRatio"]:
207
+ base_regex = r"vol_ratio_(\d+)([a-zA-Z])_to_(\d+)([a-zA-Z])"
208
+
209
+ offset_params, remaining_formula = cls._parse_offset_from_formula(formula, base_regex)
210
+
211
+ if remaining_formula is None:
212
+ return None
213
+
214
+ import re
215
+
216
+ match = re.match(f"^{base_regex}$", remaining_formula)
217
+
218
+ if not match:
219
+ return None
220
+
221
+ short_window_size = int(match.group(1))
222
+ short_window_unit = match.group(2)
223
+ window_size = int(match.group(3))
224
+ window_unit = match.group(4)
225
+
226
+ params = {
227
+ "short_window_size": short_window_size,
228
+ "short_window_unit": short_window_unit,
229
+ "window_size": window_size,
230
+ "window_unit": window_unit,
231
+ }
232
+
233
+ if offset_params:
234
+ params.update(offset_params)
235
+
236
+ return cls(**params)
237
+
238
+ def get_params(self) -> Dict[str, Optional[str]]:
239
+ res = super().get_params()
240
+ res.update(
241
+ {
242
+ "short_window_size": self.short_window_size,
243
+ "short_window_unit": self.short_window_unit,
244
+ }
245
+ )
246
+ return res
247
+
248
+ def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
249
+ return ts.apply(self._vol_ratio).iloc[:, [-1]]
250
+
251
+ def _vol_ratio(self, x: Union[pd.DataFrame, pd.Series]) -> Union[pd.DataFrame, pd.Series]:
252
+ short_vol = self._rolling_vol(x, self.short_window_size, self.short_window_unit)
253
+ long_vol = self._rolling_vol(x, self.window_size, self.window_unit)
254
+ ratio = VolatilityRatio._handle_div_errors(short_vol / long_vol)
255
+ return ratio
256
+
257
+ @staticmethod
258
+ def _handle_div_errors(x: pd.Series) -> pd.Series:
259
+ return x.replace([np.inf, -np.inf], np.nan).fillna(1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.62a3818.dev2
3
+ Version: 1.2.62a3818.dev4
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,4 +1,4 @@
1
- upgini/__about__.py,sha256=OLozvzWRYF8QVe08Gh2xAIzV-SPbWN9X8WcPvXKgTuU,33
1
+ upgini/__about__.py,sha256=DRRGQ9hjWuzUUDq0H9hZpymmoGVeS9BXeeOQ2XoHmjc,33
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=OGjpeFHbj3lWiZTOHTpWEoMMDmFY1FlNC44FKktoZvU,34956
@@ -14,15 +14,22 @@ upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1
14
14
  upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
15
15
  upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
16
16
  upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- upgini/autofe/all_operands.py,sha256=VIT5jCq5U-qypdNz1MIQ_hlIAs0ujJgRfKRUkU24nFs,332
17
+ upgini/autofe/all_operands.py,sha256=rdjF5eaE4bC6Q4eu_el5Z7ekYt8DjOFermz2bePPbUc,333
18
18
  upgini/autofe/binary.py,sha256=MnQuFiERpocjCPQUjOljlsq5FE-04GPfwtNjzvfNMyU,7671
19
19
  upgini/autofe/date.py,sha256=I07psJerrxOcHao91PdSCk9X6KWu61IBVyFRLjGNgK8,10730
20
20
  upgini/autofe/feature.py,sha256=Xto7FHH1JG-5QvkfTPNWKtV9GAzPviTNPKFZOUN7RQA,14757
21
21
  upgini/autofe/groupby.py,sha256=IYmQV9uoCdRcpkeWZj_kI3ObzoNCNx3ff3h8sTL01tk,3603
22
- upgini/autofe/operator.py,sha256=RSJWoKB2pIZ5xToVuk_T0ec7QRx-duxYEEGJ5oealaM,4784
23
- upgini/autofe/timeseries.py,sha256=-BnDp0z_Hv6Vol1Vov6QC_82U8XPV3pfIPFspK2aTCE,6598
22
+ upgini/autofe/operator.py,sha256=KKLFixtEFq-qP6WVks19F0AY2iOnB8_g8uYpbrC8USM,4894
24
23
  upgini/autofe/unary.py,sha256=yVgPvtfnPSOhrii0YgezddmgWPwyOBCR0JutaIkdTTc,4658
25
24
  upgini/autofe/vector.py,sha256=l0KdKg-txlZxDSE4hPPfCtfGQofYbl7oaABPr830sPI,667
25
+ upgini/autofe/timeseries/__init__.py,sha256=PGwwDAMwvkXl3el12tXVEmZUgDUvlmIPlXtROm6bD18,738
26
+ upgini/autofe/timeseries/base.py,sha256=T9Ec8LKJbiwTUGGsd_xhM0U0NUJblqmKchkzUI1sK88,3755
27
+ upgini/autofe/timeseries/cross.py,sha256=8ggDhsvwdxHkrWKRPl2fcFt7wamTYhkVzQcOWvIIyvU,4612
28
+ upgini/autofe/timeseries/delta.py,sha256=hXEiFWHdZndz8I7Ef5zhTHLJac9illhZOZITwpL9ppw,3618
29
+ upgini/autofe/timeseries/lag.py,sha256=LfQtg484vuqM0mgY4Wft1swHX_Srq7OKKgZswCXoiXI,1882
30
+ upgini/autofe/timeseries/roll.py,sha256=bNFMDszSYTWvB7EyhHbRY1DJqzSURvHlPAcBebt0y0Y,2878
31
+ upgini/autofe/timeseries/trend.py,sha256=eP0q1fBW4MYPrjfy7vr88tTG8qk0xypClaGHaVv1hAs,1962
32
+ upgini/autofe/timeseries/volatility.py,sha256=9shUmIKjpWTHVYjj80YBsk0XheBJ9uBuLv5NW9Mchnk,7953
26
33
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
34
  upgini/data_source/data_source_publisher.py,sha256=4S9qwlAklD8vg9tUU_c1pHE2_glUHAh15-wr5hMwKFw,22879
28
35
  upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
@@ -63,7 +70,7 @@ upgini/utils/target_utils.py,sha256=b1GzO8_gMcwXSZ2v98CY50MJJBzKbWHId_BJGybXfkM,
63
70
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
64
71
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
65
72
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
66
- upgini-1.2.62a3818.dev2.dist-info/METADATA,sha256=VEJPjgu8A5gOrr4WPbk6DYHt8BNxoqUq9rsl967GQMU,49094
67
- upgini-1.2.62a3818.dev2.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
68
- upgini-1.2.62a3818.dev2.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
69
- upgini-1.2.62a3818.dev2.dist-info/RECORD,,
73
+ upgini-1.2.62a3818.dev4.dist-info/METADATA,sha256=_sL9eQLnB5X1kyhbUiMzXIB5HUgK0KFfmuwgp3Su59c,49094
74
+ upgini-1.2.62a3818.dev4.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
75
+ upgini-1.2.62a3818.dev4.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
+ upgini-1.2.62a3818.dev4.dist-info/RECORD,,
@@ -1,200 +0,0 @@
1
- import abc
2
- from typing import Dict, List, Optional
3
-
4
- import pandas as pd
5
- from upgini.autofe.operator import PandasOperator, ParametrizedOperator
6
-
7
- try:
8
- from pydantic import field_validator as validator # V2
9
- except ImportError:
10
- from pydantic import validator # V1
11
-
12
-
13
- class TimeSeriesBase(PandasOperator, abc.ABC):
14
- is_vector: bool = True
15
- date_unit: Optional[str] = None
16
- offset_size: int = 0
17
- offset_unit: str = "D"
18
-
19
- def get_params(self) -> Dict[str, Optional[str]]:
20
- res = super().get_params()
21
- res.update(
22
- {
23
- "date_unit": self.date_unit,
24
- "offset_size": self.offset_size,
25
- "offset_unit": self.offset_unit,
26
- }
27
- )
28
- return res
29
-
30
- def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
31
- # assuming first is date, last is value, rest is group columns
32
- date = pd.to_datetime(data[0], unit=self.date_unit, errors="coerce")
33
- ts = pd.concat([date] + data[1:], axis=1)
34
- ts.drop_duplicates(subset=ts.columns[:-1], keep="first", inplace=True)
35
- ts.set_index(date.name, inplace=True)
36
- ts = ts[ts.index.notna()].sort_index()
37
- ts = (
38
- ts.groupby([c.name for c in data[1:-1]], group_keys=True)
39
- .apply(self._shift)[data[-1].name]
40
- .to_frame()
41
- .reset_index()
42
- .set_index(date.name)
43
- .groupby([c.name for c in data[1:-1]])
44
- if len(data) > 2
45
- else self._shift(ts)
46
- )
47
- ts = self._aggregate(ts)
48
- ts = ts.reindex(data[1:-1] + [date] if len(data) > 2 else date).reset_index()
49
- ts.index = date.index
50
-
51
- return ts.iloc[:, -1]
52
-
53
- def _shift(self, ts: pd.DataFrame) -> pd.DataFrame:
54
- if self.offset_size > 0:
55
- return ts.iloc[:, :-1].merge(
56
- ts.iloc[:, -1].shift(freq=f"{self.offset_size}{self.offset_unit}"),
57
- left_index=True,
58
- right_index=True,
59
- )
60
- return ts
61
-
62
- @abc.abstractmethod
63
- def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
64
- pass
65
-
66
-
67
- _roll_aggregations = {"norm_mean": lambda x: x[-1] / x.mean(), "last": lambda x: x[-1]}
68
-
69
-
70
- class Roll(TimeSeriesBase, ParametrizedOperator):
71
- aggregation: str
72
- window_size: int = 1
73
- window_unit: str = "D"
74
-
75
- @validator("window_unit")
76
- @classmethod
77
- def validate_window_unit(cls, v: str) -> str:
78
- try:
79
- pd.tseries.frequencies.to_offset(v)
80
- return v
81
- except ValueError:
82
- raise ValueError(
83
- f"Invalid window_unit: {v}. Must be a valid pandas frequency string (e.g. 'D', 'H', 'T', etc)"
84
- )
85
-
86
- def to_formula(self) -> str:
87
- roll_component = f"roll_{self.window_size}{self.window_unit}"
88
- if self.offset_size > 0:
89
- roll_component += f"_offset_{self.offset_size}{self.offset_unit}"
90
- return f"{roll_component}_{self.aggregation}"
91
-
92
- @classmethod
93
- def from_formula(cls, formula: str) -> Optional["Roll"]:
94
- import re
95
-
96
- # Try matching pattern with offset first
97
- pattern_with_offset = r"^roll_(\d+)([a-zA-Z])_offset_(\d+)([a-zA-Z])_(\w+)$"
98
- match_with_offset = re.match(pattern_with_offset, formula)
99
-
100
- if match_with_offset:
101
- window_size = int(match_with_offset.group(1))
102
- window_unit = match_with_offset.group(2)
103
- offset_size = int(match_with_offset.group(3))
104
- offset_unit = match_with_offset.group(4)
105
- aggregation = match_with_offset.group(5)
106
-
107
- return cls(
108
- window_size=window_size,
109
- window_unit=window_unit,
110
- offset_size=offset_size,
111
- offset_unit=offset_unit,
112
- aggregation=aggregation,
113
- )
114
-
115
- # If no offset pattern found, try basic pattern
116
- pattern = r"^roll_(\d+)([a-zA-Z])_(\w+)$"
117
- match = re.match(pattern, formula)
118
-
119
- if not match:
120
- return None
121
-
122
- window_size = int(match.group(1))
123
- window_unit = match.group(2)
124
- aggregation = match.group(3)
125
-
126
- return cls(window_size=window_size, window_unit=window_unit, aggregation=aggregation)
127
-
128
- def get_params(self) -> Dict[str, Optional[str]]:
129
- res = super().get_params()
130
- res.update(
131
- {
132
- "window_size": self.window_size,
133
- "window_unit": self.window_unit,
134
- "aggregation": self.aggregation,
135
- }
136
- )
137
- return res
138
-
139
- def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
140
- return ts.rolling(f"{self.window_size}{self.window_unit}", min_periods=1).agg(
141
- _roll_aggregations.get(self.aggregation, self.aggregation)
142
- )
143
-
144
-
145
- class Lag(TimeSeriesBase, ParametrizedOperator):
146
- lag_size: int
147
- lag_unit: str = "D"
148
-
149
- def to_formula(self) -> str:
150
- lag_component = f"lag_{self.lag_size}{self.lag_unit}"
151
- if self.offset_size > 0:
152
- lag_component += f"_offset_{self.offset_size}{self.offset_unit}"
153
- return lag_component
154
-
155
- @classmethod
156
- def from_formula(cls, formula: str) -> Optional["Lag"]:
157
- import re
158
-
159
- # Try matching pattern with offset first
160
- pattern_with_offset = r"^lag_(\d+)([a-zA-Z])_offset_(\d+)([a-zA-Z])$"
161
- match_with_offset = re.match(pattern_with_offset, formula)
162
-
163
- if match_with_offset:
164
- lag_size = int(match_with_offset.group(1))
165
- lag_unit = match_with_offset.group(2)
166
- offset_size = int(match_with_offset.group(3))
167
- offset_unit = match_with_offset.group(4)
168
-
169
- return cls(
170
- lag_size=lag_size,
171
- lag_unit=lag_unit,
172
- offset_size=offset_size,
173
- offset_unit=offset_unit,
174
- )
175
-
176
- # If no offset pattern found, try basic pattern
177
- pattern = r"^lag_(\d+)([a-zA-Z])$"
178
- match = re.match(pattern, formula)
179
-
180
- if not match:
181
- return None
182
-
183
- lag_size = int(match.group(1))
184
- lag_unit = match.group(2)
185
-
186
- return cls(lag_size=lag_size, lag_unit=lag_unit)
187
-
188
- def get_params(self) -> Dict[str, Optional[str]]:
189
- res = super().get_params()
190
- res.update(
191
- {
192
- "lag_size": self.lag_size,
193
- "lag_unit": self.lag_unit,
194
- }
195
- )
196
- return res
197
-
198
- def _aggregate(self, ts: pd.DataFrame) -> pd.DataFrame:
199
- lag_window = self.lag_size + 1
200
- return ts.rolling(f"{lag_window}{self.lag_unit}", min_periods=lag_window).agg(lambda x: x[0])