vtlengine 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vtlengine might be problematic. Click here for more details.

Files changed (54) hide show
  1. vtlengine/API/_InternalApi.py +153 -100
  2. vtlengine/API/__init__.py +109 -67
  3. vtlengine/AST/ASTConstructor.py +188 -98
  4. vtlengine/AST/ASTConstructorModules/Expr.py +306 -200
  5. vtlengine/AST/ASTConstructorModules/ExprComponents.py +172 -102
  6. vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
  7. vtlengine/AST/ASTEncoders.py +1 -1
  8. vtlengine/AST/ASTTemplate.py +8 -9
  9. vtlengine/AST/ASTVisitor.py +8 -12
  10. vtlengine/AST/DAG/__init__.py +43 -35
  11. vtlengine/AST/DAG/_words.py +4 -4
  12. vtlengine/AST/Grammar/lexer.py +732 -142
  13. vtlengine/AST/Grammar/parser.py +2188 -826
  14. vtlengine/AST/Grammar/tokens.py +128 -128
  15. vtlengine/AST/VtlVisitor.py +7 -4
  16. vtlengine/AST/__init__.py +22 -11
  17. vtlengine/DataTypes/NumericTypesHandling.py +5 -4
  18. vtlengine/DataTypes/TimeHandling.py +194 -301
  19. vtlengine/DataTypes/__init__.py +304 -218
  20. vtlengine/Exceptions/__init__.py +52 -27
  21. vtlengine/Exceptions/messages.py +134 -62
  22. vtlengine/Interpreter/__init__.py +781 -487
  23. vtlengine/Model/__init__.py +165 -121
  24. vtlengine/Operators/Aggregation.py +156 -95
  25. vtlengine/Operators/Analytic.py +115 -59
  26. vtlengine/Operators/Assignment.py +7 -4
  27. vtlengine/Operators/Boolean.py +27 -32
  28. vtlengine/Operators/CastOperator.py +177 -131
  29. vtlengine/Operators/Clause.py +137 -99
  30. vtlengine/Operators/Comparison.py +148 -117
  31. vtlengine/Operators/Conditional.py +149 -98
  32. vtlengine/Operators/General.py +68 -47
  33. vtlengine/Operators/HROperators.py +91 -72
  34. vtlengine/Operators/Join.py +217 -118
  35. vtlengine/Operators/Numeric.py +89 -44
  36. vtlengine/Operators/RoleSetter.py +16 -15
  37. vtlengine/Operators/Set.py +61 -36
  38. vtlengine/Operators/String.py +213 -139
  39. vtlengine/Operators/Time.py +334 -216
  40. vtlengine/Operators/Validation.py +117 -76
  41. vtlengine/Operators/__init__.py +340 -213
  42. vtlengine/Utils/__init__.py +195 -40
  43. vtlengine/__init__.py +1 -1
  44. vtlengine/files/output/__init__.py +15 -6
  45. vtlengine/files/output/_time_period_representation.py +10 -9
  46. vtlengine/files/parser/__init__.py +77 -52
  47. vtlengine/files/parser/_rfc_dialect.py +6 -5
  48. vtlengine/files/parser/_time_checking.py +46 -37
  49. vtlengine-1.0.1.dist-info/METADATA +236 -0
  50. vtlengine-1.0.1.dist-info/RECORD +58 -0
  51. {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/WHEEL +1 -1
  52. vtlengine-1.0.dist-info/METADATA +0 -104
  53. vtlengine-1.0.dist-info/RECORD +0 -58
  54. {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/LICENSE.md +0 -0
@@ -1,36 +1,40 @@
1
- import os
2
1
  from copy import copy
3
2
  from functools import reduce
4
- from typing import List, Dict
3
+ from typing import List, Dict, Any, Optional
5
4
 
6
5
  from vtlengine.DataTypes import binary_implicit_promotion
7
6
 
8
7
  from vtlengine.AST import BinOp
9
8
  from vtlengine.Exceptions import SemanticError
10
9
 
11
- if os.environ.get("SPARK"):
12
- import pyspark.pandas as pd
13
- else:
14
- import pandas as pd
10
+ # if os.environ.get("SPARK"):
11
+ # import pyspark.pandas as pd
12
+ # else:
13
+ # import pandas as pd
14
+ import pandas as pd
15
15
 
16
16
  from vtlengine.Model import Dataset, Component, Role
17
17
  from vtlengine.Operators import Operator, _id_type_promotion_join_keys
18
18
 
19
19
 
20
20
  class Join(Operator):
21
- how = None
22
- reference_dataset = None
21
+ how: str
22
+ reference_dataset: Dataset
23
23
 
24
24
  @classmethod
25
25
  def get_components_union(cls, datasets: List[Dataset]) -> List[Component]:
26
- common = []
27
- common.extend(copy(comp) for dataset in datasets for comp in dataset.components.values() if
28
- comp not in common)
26
+ common: List[Any] = []
27
+ common.extend(
28
+ copy(comp)
29
+ for dataset in datasets
30
+ for comp in dataset.components.values()
31
+ if comp not in common
32
+ )
29
33
  return common
30
34
 
31
35
  @classmethod
32
- def get_components_intersection(cls, *operands: List[Component]):
33
- element_count = {}
36
+ def get_components_intersection(cls, operands: List[Any]) -> Any:
37
+ element_count: Dict[str, Any] = {}
34
38
  for operand in operands:
35
39
  operand_set = set(operand)
36
40
  for element in operand_set:
@@ -42,29 +46,44 @@ class Join(Operator):
42
46
  return result
43
47
 
44
48
  @classmethod
45
- def merge_components(cls, operands, using=None):
49
+ def merge_components(
50
+ cls, operands: Any, using: Optional[List[str]] = None
51
+ ) -> Dict[str, Component]:
46
52
  nullability = {}
47
53
  merged_components = {}
48
54
  using = using or []
49
- common = cls.get_components_intersection(*[op.get_components_names() for op in operands])
50
- totally_common = list(reduce(lambda x, y: x & set(y.get_components_names()), operands[1:],
51
- set(operands[0].get_components_names())))
55
+ common = cls.get_components_intersection([op.get_components_names() for op in operands])
56
+ totally_common = list(
57
+ reduce(
58
+ lambda x, y: x & set(y.get_components_names()), # type: ignore[operator]
59
+ operands[1:],
60
+ set(operands[0].get_components_names()),
61
+ )
62
+ )
52
63
 
53
64
  for op in operands:
54
65
  for comp in op.components.values():
55
66
  if comp.name in using:
56
- is_identifier = all(operand.components[comp.name].role == Role.IDENTIFIER
57
- for operand in operands if
58
- comp.name in operand.get_components_names())
59
- comp.role = Role.IDENTIFIER if is_identifier else Role.MEASURE if comp.role == Role.IDENTIFIER else comp.role
67
+ is_identifier = all(
68
+ operand.components[comp.name].role == Role.IDENTIFIER
69
+ for operand in operands
70
+ if comp.name in operand.get_components_names()
71
+ )
72
+ comp.role = (
73
+ Role.IDENTIFIER
74
+ if is_identifier
75
+ else Role.MEASURE if comp.role == Role.IDENTIFIER else comp.role
76
+ )
60
77
  if comp.name not in nullability:
61
78
  nullability[comp.name] = copy(comp.nullable)
62
79
  if comp.role == Role.IDENTIFIER:
63
80
  nullability[comp.name] = False
64
81
  elif comp.name in totally_common:
65
82
  nullability[comp.name] |= copy(comp.nullable)
66
- elif cls.how == 'outer' or (
67
- cls.how == 'left' and comp.name not in cls.reference_dataset.get_components_names()):
83
+ elif cls.how == "outer" or (
84
+ cls.how == "left"
85
+ and comp.name not in cls.reference_dataset.get_components_names()
86
+ ):
68
87
  nullability[comp.name] = True
69
88
  else:
70
89
  nullability[comp.name] = copy(comp.nullable)
@@ -77,12 +96,12 @@ class Join(Operator):
77
96
  component.nullable = nullability[component_name]
78
97
 
79
98
  if component_name in common and component_name not in using:
80
- if component.role != Role.IDENTIFIER or cls.how == 'cross':
81
- new_name = f'{operand_name}#{component_name}'
99
+ if component.role != Role.IDENTIFIER or cls.how == "cross":
100
+ new_name = f"{operand_name}#{component_name}"
82
101
  if new_name in merged_components:
83
102
  raise SemanticError("1-1-13-9", comp_name=new_name)
84
103
  while new_name in common:
85
- new_name += '_dup'
104
+ new_name += "_dup"
86
105
  merged_components[new_name] = component
87
106
  merged_components[new_name].name = new_name
88
107
  else:
@@ -90,18 +109,21 @@ class Join(Operator):
90
109
  else:
91
110
  if component_name in using and component_name in merged_components:
92
111
  data_type = binary_implicit_promotion(
93
- merged_components[component_name].data_type, component.data_type)
112
+ merged_components[component_name].data_type, component.data_type
113
+ )
94
114
  component.data_type = data_type
95
115
  merged_components[component_name] = component
96
116
 
97
117
  return merged_components
98
118
 
99
119
  @classmethod
100
- def generate_result_components(cls, operands: List[Dataset], using=None) -> Dict[
101
- str, Component]:
120
+ def generate_result_components(
121
+ cls, operands: List[Dataset], using: Optional[List[str]] = None
122
+ ) -> Dict[str, Component]:
102
123
  components = {}
103
124
  inter_identifiers = cls.get_components_intersection(
104
- *[op.get_identifiers_names() for op in operands])
125
+ [op.get_identifiers_names() for op in operands]
126
+ )
105
127
 
106
128
  for op in operands:
107
129
  ids = op.get_identifiers_names()
@@ -112,7 +134,9 @@ class Join(Operator):
112
134
  @classmethod
113
135
  def evaluate(cls, operands: List[Dataset], using: List[str]) -> Dataset:
114
136
  result = cls.execute([copy(operand) for operand in operands], using)
115
- if sorted(result.get_components_names()) != sorted(result.data.columns.tolist()):
137
+ if result.data is not None and sorted(result.get_components_names()) != sorted(
138
+ result.data.columns.tolist()
139
+ ):
116
140
  missing = list(set(result.get_components_names()) - set(result.data.columns.tolist()))
117
141
  if len(missing) == 0:
118
142
  missing.append("None")
@@ -128,31 +152,49 @@ class Join(Operator):
128
152
  return result
129
153
 
130
154
  common_measures = cls.get_components_intersection(
131
- *[op.get_measures_names() + op.get_attributes_names() for op in operands])
155
+ [op.get_measures_names() + op.get_attributes_names() for op in operands]
156
+ )
132
157
  for op in operands:
133
- for column in op.data.columns.tolist():
134
- if column in common_measures and column not in using:
135
- op.data = op.data.rename(columns={column: op.name + '#' + column})
158
+ if op.data is not None:
159
+ for column in op.data.columns.tolist():
160
+ if column in common_measures and column not in using:
161
+ op.data = op.data.rename(columns={column: op.name + "#" + column})
136
162
  result.data = copy(cls.reference_dataset.data)
137
163
 
138
164
  join_keys = using if using else result.get_identifiers_names()
139
165
 
140
166
  for op in operands:
141
167
  if op is not cls.reference_dataset:
142
- merge_join_keys = [key for key in join_keys if key in op.data.columns.tolist()]
168
+ merge_join_keys = (
169
+ [key for key in join_keys if key in op.data.columns.tolist()]
170
+ if (op.data is not None)
171
+ else []
172
+ )
143
173
  if len(merge_join_keys) == 0:
144
174
  raise SemanticError("1-1-13-14", name=op.name)
145
175
  for join_key in merge_join_keys:
146
- _id_type_promotion_join_keys(result.get_component(join_key),
147
- op.get_component(join_key),
148
- join_key, result.data, op.data)
149
- result.data = pd.merge(result.data, op.data, how=cls.how, on=merge_join_keys)
150
-
151
- result.data.reset_index(drop=True, inplace=True)
176
+ _id_type_promotion_join_keys(
177
+ result.get_component(join_key),
178
+ op.get_component(join_key),
179
+ join_key,
180
+ result.data,
181
+ op.data,
182
+ )
183
+ if op.data is not None and result.data is not None:
184
+ result.data = pd.merge(
185
+ result.data,
186
+ op.data,
187
+ how=cls.how, # type: ignore[arg-type]
188
+ on=merge_join_keys,
189
+ )
190
+ else:
191
+ result.data = pd.DataFrame()
192
+ if result.data is not None:
193
+ result.data.reset_index(drop=True, inplace=True)
152
194
  return result
153
195
 
154
196
  @classmethod
155
- def validate(cls, operands: List[Dataset], using: List[str]) -> Dataset:
197
+ def validate(cls, operands: List[Dataset], using: Optional[List[str]]) -> Dataset:
156
198
  if len(operands) < 1 or sum([isinstance(op, Dataset) for op in operands]) < 1:
157
199
  raise Exception("Join operator requires at least 1 dataset")
158
200
  if not all([isinstance(op, Dataset) for op in operands]):
@@ -162,8 +204,11 @@ class Join(Operator):
162
204
  for op in operands:
163
205
  if len(op.get_identifiers()) == 0:
164
206
  raise SemanticError("1-3-27", op=cls.op)
165
- cls.reference_dataset = max(operands, key=lambda x: len(
166
- x.get_identifiers_names())) if cls.how not in ['cross', 'left'] else operands[0]
207
+ cls.reference_dataset = (
208
+ max(operands, key=lambda x: len(x.get_identifiers_names()))
209
+ if cls.how not in ["cross", "left"]
210
+ else operands[0]
211
+ )
167
212
  cls.identifiers_validation(operands, using)
168
213
  components = cls.merge_components(operands, using)
169
214
  if len(set(components.keys())) != len(components):
@@ -172,7 +217,7 @@ class Join(Operator):
172
217
  return Dataset(name="result", components=components, data=None)
173
218
 
174
219
  @classmethod
175
- def identifiers_validation(cls, operands: List[Dataset], using: List[str]) -> None:
220
+ def identifiers_validation(cls, operands: List[Dataset], using: Optional[List[str]]) -> None:
176
221
 
177
222
  # (Case A)
178
223
  info = {op.name: op.get_identifiers_names() for op in operands}
@@ -182,45 +227,60 @@ class Join(Operator):
182
227
 
183
228
  for op_name, identifiers in info.items():
184
229
  if op_name != cls.reference_dataset.name and not set(identifiers).issubset(
185
- set(info[cls.reference_dataset.name])):
230
+ set(info[cls.reference_dataset.name])
231
+ ):
186
232
  if using is None:
187
233
  missing_components = list(
188
- set(identifiers) - set(info[cls.reference_dataset.name]))
189
- raise SemanticError("1-1-13-11", op=cls.op,
190
- dataset_reference=cls.reference_dataset.name,
191
- component=missing_components[0])
234
+ set(identifiers) - set(info[cls.reference_dataset.name])
235
+ )
236
+ raise SemanticError(
237
+ "1-1-13-11",
238
+ op=cls.op,
239
+ dataset_reference=cls.reference_dataset.name,
240
+ component=missing_components[0],
241
+ )
192
242
  if using is None:
193
243
  return
194
244
 
195
245
  # (Case B1)
196
- for op_name, identifiers in info.items():
197
- if op_name != cls.reference_dataset.name and not set(identifiers).issubset(using):
198
- raise SemanticError("1-1-13-4", op=cls.op, using_names=using, dataset=op_name)
199
- reference_components = cls.reference_dataset.get_components_names()
200
- if not set(using).issubset(reference_components):
201
- raise SemanticError("1-1-13-6", op=cls.op, using_components=using,
202
- reference=cls.reference_dataset.name)
203
-
204
- for op_name, identifiers in info.items():
205
- if not set(using).issubset(identifiers):
206
- # (Case B2)
207
- if not set(using).issubset(reference_components):
208
- raise SemanticError("1-1-13-5", op=cls.op, using_names=using)
209
- else:
210
- for op in operands:
211
- if op is not cls.reference_dataset:
212
- for component in using:
213
- if component not in op.get_components_names():
214
- raise SemanticError("1-1-1-10", op=cls.op, comp_name=component,
215
- dataset_name=op.name)
246
+ if cls.reference_dataset is not None:
247
+ for op_name, identifiers in info.items():
248
+ if op_name != cls.reference_dataset.name and not set(identifiers).issubset(using):
249
+ raise SemanticError("1-1-13-4", op=cls.op, using_names=using, dataset=op_name)
250
+ reference_components = cls.reference_dataset.get_components_names()
251
+ if not set(using).issubset(reference_components):
252
+ raise SemanticError(
253
+ "1-1-13-6",
254
+ op=cls.op,
255
+ using_components=using,
256
+ reference=cls.reference_dataset.name,
257
+ )
258
+
259
+ for op_name, identifiers in info.items():
260
+ if not set(using).issubset(identifiers):
261
+ # (Case B2)
262
+ if not set(using).issubset(reference_components):
263
+ raise SemanticError("1-1-13-5", op=cls.op, using_names=using)
264
+ else:
265
+ for op in operands:
266
+ if op is not cls.reference_dataset:
267
+ for component in using:
268
+ if component not in op.get_components_names():
269
+ raise SemanticError(
270
+ "1-1-1-10",
271
+ op=cls.op,
272
+ comp_name=component,
273
+ dataset_name=op.name,
274
+ )
216
275
 
217
276
 
218
277
  class InnerJoin(Join):
219
- how = 'inner'
278
+ how = "inner"
220
279
 
221
280
  @classmethod
222
- def generate_result_components(cls, operands: List[Dataset], using=None) -> Dict[
223
- str, Component]:
281
+ def generate_result_components(
282
+ cls, operands: List[Dataset], using: Optional[List[str]] = None
283
+ ) -> Dict[str, Component]:
224
284
 
225
285
  if using is None:
226
286
  return super().generate_result_components(operands, using)
@@ -228,57 +288,74 @@ class InnerJoin(Join):
228
288
  components = {}
229
289
  for op in operands:
230
290
  components.update(
231
- {id: op.components[id] for id in using if id in op.get_measures_names()})
291
+ {id: op.components[id] for id in using if id in op.get_measures_names()}
292
+ )
232
293
  for op in operands:
233
294
  components.update({id: op.components[id] for id in op.get_identifiers_names()})
234
295
  return components
235
296
 
236
297
 
237
298
  class LeftJoin(Join):
238
- how = 'left'
299
+ how = "left"
239
300
 
240
301
 
241
302
  class FullJoin(Join):
242
- how = 'outer'
303
+ how = "outer"
243
304
 
244
305
  @classmethod
245
- def identifiers_validation(cls, operands: List[Dataset], using=None) -> None:
306
+ def identifiers_validation(
307
+ cls, operands: List[Dataset], using: Optional[List[str]] = None
308
+ ) -> None:
246
309
  if using is not None:
247
310
  raise SemanticError("1-1-13-8", op=cls.op)
248
311
  for op in operands:
249
312
  if op is cls.reference_dataset:
250
313
  continue
251
314
  if len(op.get_identifiers_names()) != len(
252
- cls.reference_dataset.get_identifiers_names()):
315
+ cls.reference_dataset.get_identifiers_names()
316
+ ):
253
317
  raise SemanticError("1-1-13-13", op=cls.op)
254
318
  if op.get_identifiers_names() != cls.reference_dataset.get_identifiers_names():
255
319
  raise SemanticError("1-1-13-12", op=cls.op)
256
320
 
257
321
 
258
322
  class CrossJoin(Join):
259
- how = 'cross'
323
+ how = "cross"
260
324
 
261
325
  @classmethod
262
- def execute(cls, operands: List[Dataset], using=None) -> Dataset:
326
+ def execute(cls, operands: List[Dataset], using: Optional[List[str]] = None) -> Dataset:
263
327
  result = cls.validate(operands, using)
264
328
  if len(operands) == 1:
265
329
  result.data = operands[0].data
266
330
  return result
267
- common = cls.get_components_intersection(*[op.get_components_names() for op in operands])
331
+ common = cls.get_components_intersection([op.get_components_names() for op in operands])
268
332
 
269
333
  for op in operands:
334
+ if op.data is None:
335
+ op.data = pd.DataFrame(columns=op.get_components_names())
270
336
  if op is operands[0]:
271
337
  result.data = op.data
272
338
  else:
273
- result.data = pd.merge(result.data, op.data, how=cls.how)
274
- result.data = result.data.rename(
275
- columns={column: op.name + '#' + column for column in result.data.columns.tolist()
276
- if column in common})
277
- result.data.reset_index(drop=True, inplace=True)
339
+ if result.data is not None:
340
+ result.data = pd.merge(
341
+ result.data, op.data, how=cls.how # type: ignore[arg-type]
342
+ )
343
+ if result.data is not None:
344
+ result.data = result.data.rename(
345
+ columns={
346
+ column: op.name + "#" + column
347
+ for column in result.data.columns.tolist()
348
+ if column in common
349
+ }
350
+ )
351
+ if result.data is not None:
352
+ result.data.reset_index(drop=True, inplace=True)
278
353
  return result
279
354
 
280
355
  @classmethod
281
- def identifiers_validation(cls, operands: List[Dataset], using=None) -> None:
356
+ def identifiers_validation(
357
+ cls, operands: List[Dataset], using: Optional[List[str]] = None
358
+ ) -> None:
282
359
  if using is not None:
283
360
  raise SemanticError("1-1-13-8", op=cls.op)
284
361
 
@@ -286,59 +363,81 @@ class CrossJoin(Join):
286
363
  class Apply(Operator):
287
364
 
288
365
  @classmethod
289
- def evaluate(cls, dataset: Dataset, expression, op_map: dict):
366
+ def evaluate(cls, dataset: Dataset, expression: Any, op_map: Dict[str, Any]) -> Dataset:
290
367
  for child in expression:
291
368
  dataset = cls.execute(dataset, op_map[child.op], child.left.value, child.right.value)
292
369
  return dataset
293
370
 
294
371
  @classmethod
295
- def execute(cls, dataset: Dataset, op, left: str, right: str) -> Dataset:
372
+ def execute(cls, dataset: Dataset, op: Any, left: str, right: str) -> Dataset:
296
373
  left_dataset = cls.create_dataset("left", left, dataset)
297
374
  right_dataset = cls.create_dataset("right", right, dataset)
298
375
  left_dataset, right_dataset = cls.get_common_components(left_dataset, right_dataset)
299
376
  return op.evaluate(left_dataset, right_dataset)
300
377
 
301
378
  @classmethod
302
- def validate(cls, dataset: Dataset, child, op_map: dict) -> None:
379
+ def validate(cls, dataset: Dataset, child: Any, op_map: Dict[str, Any]) -> None:
303
380
  if not isinstance(child, BinOp):
304
381
  raise Exception(
305
- f"Invalid expression {child} on apply operator. Only BinOp are accepted")
382
+ f"Invalid expression {child} on apply operator. Only BinOp are accepted"
383
+ )
306
384
  if child.op not in op_map:
307
385
  raise Exception(f"Operator {child.op} not implemented")
308
- left_components = [comp.name[len(child.left.value) + 1] for comp in
309
- dataset.components.values() if
310
- comp.name.startswith(child.left.value)]
311
- right_components = [comp.name[len(child.right.value) + 1] for comp in
312
- dataset.components.values() if
313
- comp.name.startswith(child.right.value)]
314
- if len(set(left_components) & set(right_components)) == 0:
315
- raise Exception(
316
- f"{child.left.value} and {child.right.value} has not any match on dataset components")
386
+ if hasattr(child.left, "value") and hasattr(child.right, "value"):
387
+ left_components = [
388
+ comp.name[len(child.left.value) + 1]
389
+ for comp in dataset.components.values()
390
+ if comp.name.startswith(child.left.value)
391
+ ]
392
+ right_components = [
393
+ comp.name[len(child.right.value) + 1]
394
+ for comp in dataset.components.values()
395
+ if comp.name.startswith(child.right.value)
396
+ ]
397
+ if len(set(left_components) & set(right_components)) == 0:
398
+ raise Exception(
399
+ f"{child.left.value} and {child.right.value} "
400
+ f"has not any match on dataset components"
401
+ )
317
402
 
318
403
  @classmethod
319
404
  def create_dataset(cls, name: str, prefix: str, dataset: Dataset) -> Dataset:
320
- prefix += '#'
321
- components = {component.name: component for component in dataset.components.values() if
322
- component.name.startswith(prefix) or component.role is Role.IDENTIFIER}
323
- data = dataset.data[list(components.keys())]
405
+ prefix += "#"
406
+ components = {
407
+ component.name: component
408
+ for component in dataset.components.values()
409
+ if component.name.startswith(prefix) or component.role is Role.IDENTIFIER
410
+ }
411
+ data = dataset.data[list(components.keys())] if dataset.data is not None else pd.DataFrame()
324
412
 
325
413
  for component in components.values():
326
- component.name = component.name[len(prefix):] if (
327
- component.name.startswith(
328
- prefix) and component.role is not Role.IDENTIFIER) else component.name
414
+ component.name = (
415
+ component.name[len(prefix) :]
416
+ if (component.name.startswith(prefix) and component.role is not Role.IDENTIFIER)
417
+ else component.name
418
+ )
329
419
  components = {component.name: component for component in components.values()}
330
- data.rename(columns={column: column[len(prefix):] for column in data.columns if
331
- column.startswith(prefix)},
332
- inplace=True)
420
+ data.rename(
421
+ columns={
422
+ column: column[len(prefix) :]
423
+ for column in data.columns
424
+ if column.startswith(prefix)
425
+ },
426
+ inplace=True,
427
+ )
333
428
  return Dataset(name=name, components=components, data=data)
334
429
 
335
430
  @classmethod
336
- def get_common_components(cls, left: Dataset, right: Dataset) -> (Dataset, Dataset):
431
+ def get_common_components(
432
+ cls, left: Dataset, right: Dataset
433
+ ) -> (Dataset, Dataset): # type: ignore[syntax]
337
434
  common = set(left.get_components_names()) & set(right.get_components_names())
338
- left.components = {comp.name: comp for comp in left.components.values() if
339
- comp.name in common}
340
- right.components = {comp.name: comp for comp in right.components.values() if
341
- comp.name in common}
342
- left.data = left.data[list(common)]
343
- right.data = right.data[list(common)]
435
+ left.components = {
436
+ comp.name: comp for comp in left.components.values() if comp.name in common
437
+ }
438
+ right.components = {
439
+ comp.name: comp for comp in right.components.values() if comp.name in common
440
+ }
441
+ left.data = left.data[list(common)] if left.data is not None else pd.DataFrame()
442
+ right.data = right.data[list(common)] if right.data is not None else pd.DataFrame()
344
443
  return left, right