upgini 1.1.280.dev0__py3-none-any.whl → 1.2.31a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (43) hide show
  1. upgini/__about__.py +1 -1
  2. upgini/__init__.py +4 -20
  3. upgini/autofe/all_operands.py +39 -9
  4. upgini/autofe/binary.py +148 -45
  5. upgini/autofe/date.py +197 -26
  6. upgini/autofe/feature.py +102 -19
  7. upgini/autofe/groupby.py +22 -22
  8. upgini/autofe/operand.py +9 -6
  9. upgini/autofe/unary.py +83 -41
  10. upgini/autofe/vector.py +8 -8
  11. upgini/data_source/data_source_publisher.py +128 -5
  12. upgini/dataset.py +50 -386
  13. upgini/features_enricher.py +931 -542
  14. upgini/http.py +27 -16
  15. upgini/lazy_import.py +35 -0
  16. upgini/metadata.py +84 -59
  17. upgini/metrics.py +164 -34
  18. upgini/normalizer/normalize_utils.py +197 -0
  19. upgini/resource_bundle/strings.properties +66 -51
  20. upgini/search_task.py +10 -4
  21. upgini/utils/Roboto-Regular.ttf +0 -0
  22. upgini/utils/base_search_key_detector.py +14 -12
  23. upgini/utils/country_utils.py +16 -0
  24. upgini/utils/custom_loss_utils.py +39 -36
  25. upgini/utils/datetime_utils.py +98 -45
  26. upgini/utils/deduplicate_utils.py +135 -112
  27. upgini/utils/display_utils.py +46 -15
  28. upgini/utils/email_utils.py +54 -16
  29. upgini/utils/feature_info.py +172 -0
  30. upgini/utils/features_validator.py +34 -20
  31. upgini/utils/ip_utils.py +100 -1
  32. upgini/utils/phone_utils.py +343 -0
  33. upgini/utils/postal_code_utils.py +34 -0
  34. upgini/utils/sklearn_ext.py +28 -19
  35. upgini/utils/target_utils.py +113 -57
  36. upgini/utils/warning_counter.py +1 -0
  37. upgini/version_validator.py +8 -4
  38. {upgini-1.1.280.dev0.dist-info → upgini-1.2.31a1.dist-info}/METADATA +31 -16
  39. upgini-1.2.31a1.dist-info/RECORD +65 -0
  40. upgini/normalizer/phone_normalizer.py +0 -340
  41. upgini-1.1.280.dev0.dist-info/RECORD +0 -62
  42. {upgini-1.1.280.dev0.dist-info → upgini-1.2.31a1.dist-info}/WHEEL +0 -0
  43. {upgini-1.1.280.dev0.dist-info → upgini-1.2.31a1.dist-info}/licenses/LICENSE +0 -0
upgini/autofe/feature.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import hashlib
2
2
  import itertools
3
- from typing import Dict, List, Optional, Tuple, Union
3
+ from typing import Dict, List, Optional, Set, Tuple, Union
4
4
 
5
5
  import numpy as np
6
6
  import pandas as pd
@@ -16,6 +16,15 @@ class Column:
16
16
  self.data = data
17
17
  self.calculate_all = calculate_all
18
18
 
19
+ def get_display_name(self, cache: bool = True, shorten: bool = False, **kwargs) -> str:
20
+ return self.name
21
+
22
+ def set_op_params(self, params: Dict[str, str]) -> "Column":
23
+ return self
24
+
25
+ def get_op_params(self, **kwargs):
26
+ return dict()
27
+
19
28
  def rename_columns(self, mapping: Dict[str, str]) -> "Column":
20
29
  self.name = self._unhash(mapping.get(self.name) or self.name)
21
30
  return self
@@ -35,9 +44,13 @@ class Column:
35
44
  def get_column_nodes(self) -> List["Column"]:
36
45
  return [self]
37
46
 
38
- def get_columns(self) -> List[str]:
47
+ def get_columns(self, **kwargs) -> List[str]:
39
48
  return [self.name]
40
49
 
50
+ @property
51
+ def children(self) -> List[Union["Feature", "Column"]]:
52
+ return []
53
+
41
54
  def infer_type(self, data: pd.DataFrame) -> DtypeObj:
42
55
  return data[self.name].dtype
43
56
 
@@ -51,6 +64,12 @@ class Column:
51
64
  def to_pretty_formula(self) -> str:
52
65
  return self.to_formula()
53
66
 
67
+ def __eq__(self, value: object) -> bool:
68
+ if not isinstance(value, Column):
69
+ return False
70
+ else:
71
+ return self.name == value.name and self.calculate_all == value.calculate_all
72
+
54
73
 
55
74
  class Feature:
56
75
  def __init__(
@@ -69,19 +88,51 @@ class Feature:
69
88
  self.cached_display_name = cached_display_name
70
89
  self.alias = alias
71
90
 
72
- def set_op_params(self, params: Dict[str, str]) -> "Feature":
91
+ def set_op_params(self, params: Optional[Dict[str, str]]) -> "Feature":
92
+ obj_dict = self.op.dict().copy()
93
+ obj_dict.update(params or {})
94
+ self.op = self.op.__class__.parse_obj(obj_dict)
73
95
  self.op.set_params(params)
96
+
97
+ for child in self.children:
98
+ child_params = {
99
+ k[len(child.get_display_name()) + 1 :]: v
100
+ for k, v in params.items()
101
+ if k.startswith(child.get_display_name())
102
+ }
103
+ if not child_params:
104
+ child_params = params
105
+ child.set_op_params(child_params)
74
106
  return self
75
107
 
108
+ def get_op_params(self, **kwargs) -> Dict[str, str]:
109
+ return {
110
+ k: str(v)
111
+ for k, v in dict(
112
+ (
113
+ (f"{child.get_display_name(**kwargs)}_{k}", v)
114
+ for child in self.children
115
+ for k, v in child.get_op_params(**kwargs).items()
116
+ ),
117
+ **(self.op.get_params() or {}),
118
+ ).items()
119
+ if v is not None
120
+ }
121
+
76
122
  def get_hash(self) -> str:
77
- return hashlib.sha256("_".join([self.op.name] + [ch.name for ch in self.children]).encode("utf-8")).hexdigest()[
78
- :8
79
- ]
123
+ return hashlib.sha256(
124
+ "_".join([self.op.name] + [ch.get_display_name() for ch in self.children]).encode("utf-8")
125
+ ).hexdigest()[:8]
80
126
 
81
127
  def set_alias(self, alias: str) -> "Feature":
82
128
  self.alias = alias
83
129
  return self
84
130
 
131
+ def get_all_operand_names(self) -> Set[str]:
132
+ return {self.op.name}.union(
133
+ {n for f in self.children if isinstance(f, Feature) for n in f.get_all_operand_names()}
134
+ )
135
+
85
136
  def rename_columns(self, mapping: Dict[str, str]) -> "Feature":
86
137
  for child in self.children:
87
138
  child.rename_columns(mapping)
@@ -108,19 +159,24 @@ class Feature:
108
159
  for child in self.children:
109
160
  child.delete_data()
110
161
 
162
+ def get_op_display_name(self) -> str:
163
+ return self.op.alias or self.op.name.lower()
164
+
111
165
  def get_display_name(self, cache: bool = True, shorten: bool = False, **kwargs) -> str:
112
166
  if self.cached_display_name is not None and cache:
113
167
  return self.cached_display_name
114
168
 
169
+ should_stack_op = not isinstance(self.children[0], Column) if self.op.is_unary else False
170
+ prev_name = [self.children[0].get_op_display_name()] if should_stack_op else []
171
+
115
172
  if self.alias:
116
173
  components = ["f_autofe", self.alias]
117
- elif shorten and not self.op.is_unary:
118
- components = ["f_autofe", self.op.alias or self.op.name.lower()]
174
+ elif shorten and (not self.op.is_unary or should_stack_op):
175
+ components = ["f_autofe"] + prev_name + [self.get_op_display_name()]
119
176
  else:
120
- components = ["f_" + "_f_".join(self.get_columns(**kwargs))] + [
121
- "autofe",
122
- self.op.alias or self.op.name.lower(),
123
- ]
177
+ components = (
178
+ ["f_" + "_f_".join(self.get_columns(**kwargs))] + ["autofe"] + prev_name + [self.get_op_display_name()]
179
+ )
124
180
  components.extend([str(self.display_index)] if self.display_index is not None else [])
125
181
  display_name = "_".join(components)
126
182
 
@@ -211,12 +267,19 @@ class Feature:
211
267
 
212
268
  @staticmethod
213
269
  def from_formula(string: str) -> Union[Column, "Feature"]:
214
- if string[-1] != ")":
215
- return Column(string)
216
270
 
217
271
  def is_trivial_char(c: str) -> bool:
218
272
  return c not in "()+-*/,"
219
273
 
274
+ if string[-1] != ")":
275
+ if all(is_trivial_char(c) for c in string):
276
+ return Column(string)
277
+ else:
278
+ raise ValueError(
279
+ f"Unsupported column name: {string}. Column names should not have characters: "
280
+ "['(', ')', '+', '-', '*', '/', ',']"
281
+ )
282
+
220
283
  def find_prev(string: str) -> int:
221
284
  if string[-1] != ")":
222
285
  return max([(0 if is_trivial_char(c) else i + 1) for i, c in enumerate(string)])
@@ -238,8 +301,11 @@ class Feature:
238
301
  return Feature(find_op(string[: p2 - 1]), [Feature.from_formula(string[p2:-1])])
239
302
  p1 = find_prev(string[: p2 - 1])
240
303
  if string[0] == "(":
304
+ op = find_op(string[p2 - 1])
305
+ if op is None:
306
+ raise ValueError(f"Unsupported operand: {string[p2 - 1]}")
241
307
  return Feature(
242
- find_op(string[p2 - 1]),
308
+ op,
243
309
  [Feature.from_formula(string[p1 : p2 - 1]), Feature.from_formula(string[p2:-1])],
244
310
  )
245
311
  else:
@@ -250,6 +316,8 @@ class Feature:
250
316
  [Feature.from_formula(string[p1 : p2 - 1]), Feature.from_formula(string[p2:-1])],
251
317
  )
252
318
  else:
319
+ if string[p1 - 1] == "(":
320
+ raise ValueError(f"Unsupported operand: {string[: p1 - 1]}")
253
321
  base_features = [
254
322
  Feature.from_formula(string[p2:-1]),
255
323
  Feature.from_formula(string[p1 : p2 - 1]),
@@ -286,11 +354,26 @@ class FeatureGroup:
286
354
  return names
287
355
 
288
356
  def calculate(self, data: pd.DataFrame, is_root=False) -> pd.DataFrame:
289
- main_column = None if self.main_column_node is None else self.main_column_node.get_columns()[0]
290
357
  if isinstance(self.op, PandasOperand):
291
- columns = self.get_columns()
292
- new_data = self.op.calculate_group(data[columns], main_column=main_column)
293
- new_data.rename(columns=dict(zip(columns, self.get_display_names())), inplace=True)
358
+ main_column = None if self.main_column_node is None else self.main_column_node.get_display_name()
359
+ lower_order_children = []
360
+ if self.main_column_node is not None:
361
+ lower_order_children.append(self.main_column_node)
362
+ lower_order_children.extend(
363
+ ch for f in self.children for ch in f.children if ch.get_display_name() != main_column
364
+ )
365
+ lower_order_names = [ch.get_display_name() for ch in lower_order_children]
366
+ child_data = pd.concat(
367
+ [ch.calculate(data) for ch in lower_order_children],
368
+ axis=1,
369
+ )
370
+ child_data.columns = lower_order_names
371
+
372
+ new_data = self.op.calculate_group(child_data, main_column=main_column)
373
+ new_data.rename(
374
+ columns=dict(zip((n for n in lower_order_names if n != main_column), self.get_display_names())),
375
+ inplace=True,
376
+ )
294
377
  else:
295
378
  raise NotImplementedError(f"Unrecognized operator {self.op.name}.")
296
379
 
upgini/autofe/groupby.py CHANGED
@@ -7,9 +7,9 @@ from upgini.autofe.operand import PandasOperand, VectorizableMixin
7
7
 
8
8
  class GroupByThenAgg(PandasOperand, VectorizableMixin):
9
9
  agg: Optional[str]
10
- is_vectorizable = True
11
- is_grouping = True
12
- is_distribution_dependent = True
10
+ is_vectorizable: bool = True
11
+ is_grouping: bool = True
12
+ is_distribution_dependent: bool = True
13
13
 
14
14
  def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
15
15
  temp = left.groupby(right).agg(self.agg)
@@ -24,17 +24,17 @@ class GroupByThenAgg(PandasOperand, VectorizableMixin):
24
24
 
25
25
 
26
26
  class GroupByThenMedian(GroupByThenAgg):
27
- name = "GroupByThenMedian"
28
- pandas_agg = "median"
29
- is_distribution_dependent = True
27
+ name: str = "GroupByThenMedian"
28
+ pandas_agg: str = "median"
29
+ is_distribution_dependent: bool = True
30
30
 
31
31
 
32
32
  class GroupByThenRank(PandasOperand, VectorizableMixin):
33
- name = "GroupByThenRank"
34
- is_vectorizable = True
35
- is_grouping = True
36
- output_type = "float"
37
- is_distribution_dependent = True
33
+ name: str = "GroupByThenRank"
34
+ is_vectorizable: bool = True
35
+ is_grouping: bool = True
36
+ output_type: Optional[str] = "float"
37
+ is_distribution_dependent: bool = True
38
38
 
39
39
  def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
40
40
  temp = pd.DataFrame(left[~right.isna()].groupby(right).rank(ascending=True, pct=True)).reset_index()
@@ -49,12 +49,12 @@ class GroupByThenRank(PandasOperand, VectorizableMixin):
49
49
 
50
50
 
51
51
  class GroupByThenNUnique(PandasOperand, VectorizableMixin):
52
- name = "GroupByThenNUnique"
53
- is_vectorizable = True
54
- is_grouping = True
55
- output_type = "int"
56
- is_distribution_dependent = True
57
- input_type = "discrete"
52
+ name: str = "GroupByThenNUnique"
53
+ is_vectorizable: bool = True
54
+ is_grouping: bool = True
55
+ output_type: Optional[str] = "int"
56
+ is_distribution_dependent: bool = True
57
+ input_type: Optional[str] = "discrete"
58
58
 
59
59
  def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
60
60
  nunique = left.groupby(right).nunique()
@@ -69,11 +69,11 @@ class GroupByThenNUnique(PandasOperand, VectorizableMixin):
69
69
 
70
70
 
71
71
  class GroupByThenFreq(PandasOperand):
72
- name = "GroupByThenFreq"
73
- is_grouping = True
74
- output_type = "float"
75
- is_distribution_dependent = True
76
- input_type = "discrete"
72
+ name: str = "GroupByThenFreq"
73
+ is_grouping: bool = True
74
+ output_type: Optional[str] = "float"
75
+ is_distribution_dependent: bool = True
76
+ input_type: Optional[str] = "discrete"
77
77
 
78
78
  def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
79
79
  def _f(x):
upgini/autofe/operand.py CHANGED
@@ -8,25 +8,28 @@ from pydantic import BaseModel
8
8
 
9
9
  class Operand(BaseModel):
10
10
  name: str
11
- alias: Optional[str]
11
+ alias: Optional[str] = None
12
12
  is_unary: bool = False
13
+ is_symmetrical: bool = False
13
14
  has_symmetry_importance: bool = False
14
- input_type: Optional[str]
15
- output_type: Optional[str]
15
+ input_type: Optional[str] = None
16
+ output_type: Optional[str] = None
16
17
  is_categorical: bool = False
17
18
  is_vectorizable: bool = False
18
19
  is_grouping: bool = False
19
20
  is_binary: bool = False
20
21
  is_vector: bool = False
21
22
  is_distribution_dependent: bool = False
22
- params: Optional[Dict[str, str]]
23
+ params: Optional[Dict[str, str]] = None
23
24
 
24
25
  def set_params(self, params: Dict[str, str]):
25
26
  self.params = params
26
27
  return self
27
28
 
28
- def get_params(self) -> Dict[str, str]:
29
- return self.params
29
+ def get_params(self) -> Dict[str, Optional[str]]:
30
+ res = {"alias": self.alias}
31
+ res.update(self.params or {})
32
+ return res
30
33
 
31
34
 
32
35
  MAIN_COLUMN = "main_column"
upgini/autofe/unary.py CHANGED
@@ -1,3 +1,4 @@
1
+ from typing import Dict, Optional
1
2
  import numpy as np
2
3
  import pandas as pd
3
4
 
@@ -5,24 +6,26 @@ from upgini.autofe.operand import PandasOperand, VectorizableMixin
5
6
 
6
7
 
7
8
  class Abs(PandasOperand, VectorizableMixin):
8
- name = "abs"
9
- is_unary = True
10
- is_vectorizable = True
11
- group_index = 0
9
+ name: str = "abs"
10
+ is_unary: bool = True
11
+ is_vectorizable: bool = True
12
+ group_index: int = 0
12
13
 
13
14
  def calculate_unary(self, data: pd.Series) -> pd.Series:
14
- return data.abs()
15
+ return data.astype(np.float64).abs()
16
+ # return data.abs()
15
17
 
16
18
  def calculate_group(self, data: pd.DataFrame, **kwargs) -> pd.DataFrame:
17
- return data.abs()
19
+ return data.astype(np.float64).abs()
20
+ # return data.abs()
18
21
 
19
22
 
20
23
  class Log(PandasOperand, VectorizableMixin):
21
- name = "log"
22
- is_unary = True
23
- is_vectorizable = True
24
- output_type = "float"
25
- group_index = 0
24
+ name: str = "log"
25
+ is_unary: bool = True
26
+ is_vectorizable: bool = True
27
+ output_type: Optional[str] = "float"
28
+ group_index: int = 0
26
29
 
27
30
  def calculate_unary(self, data: pd.Series) -> pd.Series:
28
31
  return self._round_value(np.log(np.abs(data.replace(0, np.nan))), 10)
@@ -32,11 +35,11 @@ class Log(PandasOperand, VectorizableMixin):
32
35
 
33
36
 
34
37
  class Sqrt(PandasOperand, VectorizableMixin):
35
- name = "sqrt"
36
- is_unary = True
37
- is_vectorizable = True
38
- output_type = "float"
39
- group_index = 0
38
+ name: str = "sqrt"
39
+ is_unary: bool = True
40
+ is_vectorizable: bool = True
41
+ output_type: Optional[str] = "float"
42
+ group_index: int = 0
40
43
 
41
44
  def calculate_unary(self, data: pd.Series) -> pd.Series:
42
45
  return self._round_value(np.sqrt(np.abs(data)))
@@ -46,10 +49,10 @@ class Sqrt(PandasOperand, VectorizableMixin):
46
49
 
47
50
 
48
51
  class Square(PandasOperand, VectorizableMixin):
49
- name = "square"
50
- is_unary = True
51
- is_vectorizable = True
52
- group_index = 0
52
+ name: str = "square"
53
+ is_unary: bool = True
54
+ is_vectorizable: bool = True
55
+ group_index: int = 0
53
56
 
54
57
  def calculate_unary(self, data: pd.Series) -> pd.Series:
55
58
  return np.square(data)
@@ -59,11 +62,11 @@ class Square(PandasOperand, VectorizableMixin):
59
62
 
60
63
 
61
64
  class Sigmoid(PandasOperand, VectorizableMixin):
62
- name = "sigmoid"
63
- is_unary = True
64
- is_vectorizable = True
65
- output_type = "float"
66
- group_index = 0
65
+ name: str = "sigmoid"
66
+ is_unary: bool = True
67
+ is_vectorizable: bool = True
68
+ output_type: Optional[str] = "float"
69
+ group_index: int = 0
67
70
 
68
71
  def calculate_unary(self, data: pd.Series) -> pd.Series:
69
72
  return self._round_value(1 / (1 + np.exp(-data)))
@@ -73,12 +76,12 @@ class Sigmoid(PandasOperand, VectorizableMixin):
73
76
 
74
77
 
75
78
  class Floor(PandasOperand, VectorizableMixin):
76
- name = "floor"
77
- is_unary = True
78
- is_vectorizable = True
79
- output_type = "int"
80
- input_type = "continuous"
81
- group_index = 0
79
+ name: str = "floor"
80
+ is_unary: bool = True
81
+ is_vectorizable: bool = True
82
+ output_type: Optional[str] = "int"
83
+ input_type: Optional[str] = "continuous"
84
+ group_index: int = 0
82
85
 
83
86
  def calculate_unary(self, data: pd.Series) -> pd.Series:
84
87
  return np.floor(data)
@@ -88,11 +91,11 @@ class Floor(PandasOperand, VectorizableMixin):
88
91
 
89
92
 
90
93
  class Residual(PandasOperand, VectorizableMixin):
91
- name = "residual"
92
- is_unary = True
93
- is_vectorizable = True
94
- input_type = "continuous"
95
- group_index = 0
94
+ name: str = "residual"
95
+ is_unary: bool = True
96
+ is_vectorizable: bool = True
97
+ input_type: Optional[str] = "continuous"
98
+ group_index: int = 0
96
99
 
97
100
  def calculate_unary(self, data: pd.Series) -> pd.Series:
98
101
  return data - np.floor(data)
@@ -102,12 +105,51 @@ class Residual(PandasOperand, VectorizableMixin):
102
105
 
103
106
 
104
107
  class Freq(PandasOperand):
105
- name = "freq"
106
- is_unary = True
107
- output_type = "float"
108
- is_distribution_dependent = True
109
- input_type = "discrete"
108
+ name: str = "freq"
109
+ is_unary: bool = True
110
+ output_type: Optional[str] = "float"
111
+ is_distribution_dependent: bool = True
112
+ input_type: Optional[str] = "discrete"
110
113
 
111
114
  def calculate_unary(self, data: pd.Series) -> pd.Series:
112
115
  value_counts = data.value_counts(normalize=True)
113
116
  return self._loc(data, value_counts)
117
+
118
+
119
+ class Norm(PandasOperand):
120
+ name: str = "norm"
121
+ is_unary: bool = True
122
+ output_type: Optional[str] = "float"
123
+ norm: Optional[float] = None
124
+
125
+ def calculate_unary(self, data: pd.Series) -> pd.Series:
126
+ data_dropna = data.dropna()
127
+ if data_dropna.empty:
128
+ return data
129
+
130
+ if self.norm is not None:
131
+ normalized_data = data / self.norm
132
+ else:
133
+ self.norm = np.sqrt(np.sum(data * data))
134
+ normalized_data = data / self.norm
135
+
136
+ return normalized_data
137
+
138
+ def set_params(self, params: Dict[str, str]):
139
+ super().set_params(params)
140
+ if params is not None and "norm" in params:
141
+ self.norm = float(params["norm"])
142
+ return self
143
+
144
+ def get_params(self) -> Dict[str, Optional[str]]:
145
+ res = super().get_params()
146
+ if self.norm is not None:
147
+ res["norm"] = self.norm
148
+ return res
149
+
150
+
151
+ class Embeddings(PandasOperand):
152
+ name: str = "emb"
153
+ is_unary: bool = True
154
+ input_type: Optional[str] = "string"
155
+ output_type: Optional[str] = "vector"
upgini/autofe/vector.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import List
1
+ from typing import List, Optional
2
2
 
3
3
  import pandas as pd
4
4
 
@@ -6,19 +6,19 @@ from upgini.autofe.operand import PandasOperand, VectorizableMixin
6
6
 
7
7
 
8
8
  class Mean(PandasOperand, VectorizableMixin):
9
- name = "mean"
10
- output_type = "float"
11
- is_vector = True
12
- group_index = 0
9
+ name: str = "mean"
10
+ output_type: Optional[str] = "float"
11
+ is_vector: bool = True
12
+ group_index: int = 0
13
13
 
14
14
  def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
15
15
  return pd.DataFrame(data).T.fillna(0).mean(axis=1)
16
16
 
17
17
 
18
18
  class Sum(PandasOperand, VectorizableMixin):
19
- name = "sum"
20
- is_vector = True
21
- group_index = 0
19
+ name: str = "sum"
20
+ is_vector: bool = True
21
+ group_index: int = 0
22
22
 
23
23
  def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
24
24
  return pd.DataFrame(data).T.fillna(0).sum(axis=1)