vtlengine 1.4.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. vtlengine/API/_InternalApi.py +791 -0
  2. vtlengine/API/__init__.py +612 -0
  3. vtlengine/API/data/schema/external_routines_schema.json +34 -0
  4. vtlengine/API/data/schema/json_schema_2.1.json +116 -0
  5. vtlengine/API/data/schema/value_domain_schema.json +97 -0
  6. vtlengine/AST/ASTComment.py +57 -0
  7. vtlengine/AST/ASTConstructor.py +598 -0
  8. vtlengine/AST/ASTConstructorModules/Expr.py +1928 -0
  9. vtlengine/AST/ASTConstructorModules/ExprComponents.py +995 -0
  10. vtlengine/AST/ASTConstructorModules/Terminals.py +790 -0
  11. vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
  12. vtlengine/AST/ASTDataExchange.py +10 -0
  13. vtlengine/AST/ASTEncoders.py +32 -0
  14. vtlengine/AST/ASTString.py +675 -0
  15. vtlengine/AST/ASTTemplate.py +558 -0
  16. vtlengine/AST/ASTVisitor.py +25 -0
  17. vtlengine/AST/DAG/__init__.py +479 -0
  18. vtlengine/AST/DAG/_words.py +10 -0
  19. vtlengine/AST/Grammar/Vtl.g4 +705 -0
  20. vtlengine/AST/Grammar/VtlTokens.g4 +409 -0
  21. vtlengine/AST/Grammar/__init__.py +0 -0
  22. vtlengine/AST/Grammar/lexer.py +2139 -0
  23. vtlengine/AST/Grammar/parser.py +16597 -0
  24. vtlengine/AST/Grammar/tokens.py +169 -0
  25. vtlengine/AST/VtlVisitor.py +824 -0
  26. vtlengine/AST/__init__.py +674 -0
  27. vtlengine/DataTypes/TimeHandling.py +562 -0
  28. vtlengine/DataTypes/__init__.py +863 -0
  29. vtlengine/DataTypes/_time_checking.py +135 -0
  30. vtlengine/Exceptions/__exception_file_generator.py +96 -0
  31. vtlengine/Exceptions/__init__.py +159 -0
  32. vtlengine/Exceptions/messages.py +1004 -0
  33. vtlengine/Interpreter/__init__.py +2048 -0
  34. vtlengine/Model/__init__.py +501 -0
  35. vtlengine/Operators/Aggregation.py +357 -0
  36. vtlengine/Operators/Analytic.py +455 -0
  37. vtlengine/Operators/Assignment.py +23 -0
  38. vtlengine/Operators/Boolean.py +106 -0
  39. vtlengine/Operators/CastOperator.py +451 -0
  40. vtlengine/Operators/Clause.py +366 -0
  41. vtlengine/Operators/Comparison.py +488 -0
  42. vtlengine/Operators/Conditional.py +495 -0
  43. vtlengine/Operators/General.py +191 -0
  44. vtlengine/Operators/HROperators.py +254 -0
  45. vtlengine/Operators/Join.py +447 -0
  46. vtlengine/Operators/Numeric.py +422 -0
  47. vtlengine/Operators/RoleSetter.py +77 -0
  48. vtlengine/Operators/Set.py +176 -0
  49. vtlengine/Operators/String.py +578 -0
  50. vtlengine/Operators/Time.py +1144 -0
  51. vtlengine/Operators/Validation.py +275 -0
  52. vtlengine/Operators/__init__.py +900 -0
  53. vtlengine/Utils/__Virtual_Assets.py +34 -0
  54. vtlengine/Utils/__init__.py +479 -0
  55. vtlengine/__extras_check.py +17 -0
  56. vtlengine/__init__.py +27 -0
  57. vtlengine/files/__init__.py +0 -0
  58. vtlengine/files/output/__init__.py +35 -0
  59. vtlengine/files/output/_time_period_representation.py +55 -0
  60. vtlengine/files/parser/__init__.py +240 -0
  61. vtlengine/files/parser/_rfc_dialect.py +22 -0
  62. vtlengine/py.typed +0 -0
  63. vtlengine-1.4.0rc2.dist-info/METADATA +89 -0
  64. vtlengine-1.4.0rc2.dist-info/RECORD +66 -0
  65. vtlengine-1.4.0rc2.dist-info/WHEEL +4 -0
  66. vtlengine-1.4.0rc2.dist-info/licenses/LICENSE.md +661 -0
@@ -0,0 +1,495 @@
1
+ from copy import copy
2
+ from typing import Any, List, Union
3
+
4
+ import numpy as np
5
+
6
+ # if os.environ.get("SPARK", False):
7
+ # import pyspark.pandas as pd
8
+ # else:
9
+ # import pandas as pd
10
+ import pandas as pd
11
+
12
+ from vtlengine.DataTypes import (
13
+ COMP_NAME_MAPPING,
14
+ SCALAR_TYPES_CLASS_REVERSE,
15
+ Boolean,
16
+ Null,
17
+ binary_implicit_promotion,
18
+ )
19
+ from vtlengine.Exceptions import SemanticError
20
+ from vtlengine.Model import DataComponent, Dataset, Role, Scalar
21
+ from vtlengine.Operators import Binary, Operator
22
+ from vtlengine.Utils.__Virtual_Assets import VirtualCounter
23
+
24
+
25
+ class If(Operator):
26
+ """
27
+ If class:
28
+ `If-then-else <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=225&zoom=100,72,142>`_ operator
29
+ inherits from Operator, a superclass that contains general validate and evaluate class methods.
30
+ It has the following class methods:
31
+ Class methods:
32
+ evaluate: Evaluates if the operation is well constructed, checking the actual condition and
33
+ dropping a boolean result.
34
+ The result will depend on the data class, such as datacomponent and dataset.
35
+
36
+ component_level_evaluation: Returns a pandas dataframe with data to set the condition
37
+
38
+ dataset_level_evaluation: Sets the dataset and evaluates its correct schema to be able to perform the condition.
39
+
40
+ validate: Class method that has two branches so datacomponent and datasets can be validated. With datacomponent,
41
+ the code reviews if it is actually a Measure and if it is a binary operation. Dataset branch reviews if the
42
+ identifiers are the same in 'if', 'then' and 'else'.
43
+ """ # noqa E501
44
+
45
+ @classmethod
46
+ def evaluate(cls, condition: Any, true_branch: Any, false_branch: Any) -> Any:
47
+ result = cls.validate(condition, true_branch, false_branch)
48
+ if not isinstance(result, Scalar):
49
+ if isinstance(condition, DataComponent):
50
+ result.data = cls.component_level_evaluation(condition, true_branch, false_branch)
51
+ if isinstance(condition, Dataset):
52
+ result = cls.dataset_level_evaluation(result, condition, true_branch, false_branch)
53
+ return result
54
+
55
+ @classmethod
56
+ def component_level_evaluation(
57
+ cls, condition: DataComponent, true_branch: Any, false_branch: Any
58
+ ) -> Any:
59
+ result = None
60
+ if condition.data is not None:
61
+ if isinstance(true_branch, Scalar):
62
+ true_data = pd.Series(true_branch.value, index=condition.data.index)
63
+ else:
64
+ true_data = true_branch.data.reindex(condition.data.index)
65
+ if isinstance(false_branch, Scalar):
66
+ false_data = pd.Series(false_branch.value, index=condition.data.index)
67
+ else:
68
+ false_data = false_branch.data.reindex(condition.data.index)
69
+ condition.data = condition.data.fillna(False)
70
+ result = np.where(condition.data, true_data, false_data)
71
+
72
+ return pd.Series(result, index=condition.data.index) # type: ignore[union-attr]
73
+
74
+ @classmethod
75
+ def dataset_level_evaluation(
76
+ cls, result: Any, condition: Any, true_branch: Any, false_branch: Any
77
+ ) -> Dataset:
78
+ ids = condition.get_identifiers_names()
79
+ condition_measure = condition.get_measures_names()[0]
80
+ true_data = condition.data[condition.data[condition_measure].dropna() == True]
81
+ false_data = condition.data[condition.data[condition_measure] != True]
82
+
83
+ if isinstance(true_branch, Dataset):
84
+ if len(true_data) > 0 and true_branch.data is not None:
85
+ true_data = pd.merge(
86
+ true_data,
87
+ true_branch.data,
88
+ on=ids,
89
+ how="left",
90
+ suffixes=("_condition", ""),
91
+ )
92
+ else:
93
+ true_data = pd.DataFrame(columns=true_branch.get_components_names())
94
+ else:
95
+ true_data[condition_measure] = true_data[condition_measure].apply(
96
+ lambda x: true_branch.value
97
+ )
98
+ if isinstance(false_branch, Dataset):
99
+ if len(false_data) > 0 and false_branch.data is not None:
100
+ false_data = pd.merge(
101
+ false_data,
102
+ false_branch.data,
103
+ on=ids,
104
+ how="left",
105
+ suffixes=("_condition", ""),
106
+ )
107
+ else:
108
+ false_data = pd.DataFrame(columns=false_branch.get_components_names())
109
+ else:
110
+ false_data[condition_measure] = false_data[condition_measure].apply(
111
+ lambda x: false_branch.value
112
+ )
113
+
114
+ result.data = (
115
+ pd.concat([true_data, false_data], ignore_index=True)
116
+ .drop_duplicates()
117
+ .sort_values(by=ids)
118
+ ).reset_index(drop=True)
119
+ if isinstance(result, Dataset):
120
+ drop_columns = [
121
+ column for column in result.data.columns if column not in result.components
122
+ ]
123
+ result.data = result.data.drop(columns=drop_columns)
124
+ if isinstance(true_branch, Scalar) and isinstance(false_branch, Scalar):
125
+ result.get_measures()[0].data_type = true_branch.data_type
126
+ result.get_measures()[0].name = COMP_NAME_MAPPING[true_branch.data_type]
127
+ if result.data is not None:
128
+ result.data = result.data.rename(
129
+ columns={condition_measure: result.get_measures()[0].name}
130
+ )
131
+ return result
132
+
133
+ @classmethod
134
+ def validate( # noqa: C901
135
+ cls, condition: Any, true_branch: Any, false_branch: Any
136
+ ) -> Union[Scalar, DataComponent, Dataset]:
137
+ nullable = False
138
+ left = true_branch
139
+ right = false_branch
140
+ dataset_name = VirtualCounter._new_ds_name()
141
+ if true_branch.__class__ != false_branch.__class__:
142
+ if (isinstance(true_branch, DataComponent) and isinstance(false_branch, Dataset)) or (
143
+ isinstance(true_branch, Dataset) and isinstance(false_branch, DataComponent)
144
+ ):
145
+ raise ValueError(
146
+ "If then and else operands cannot be dataset and component respectively"
147
+ )
148
+ if isinstance(true_branch, Scalar):
149
+ left = false_branch
150
+ right = true_branch
151
+
152
+ # Datacomponent
153
+ comp_name = VirtualCounter._new_dc_name()
154
+ if isinstance(condition, DataComponent):
155
+ if not condition.data_type == Boolean:
156
+ raise SemanticError(
157
+ "1-1-9-11",
158
+ op=cls.op,
159
+ type=SCALAR_TYPES_CLASS_REVERSE[condition.data_type],
160
+ )
161
+
162
+ if (
163
+ isinstance(left, Scalar)
164
+ and isinstance(right, Scalar)
165
+ and (left.data_type == Null or right.data_type == Null)
166
+ ):
167
+ nullable = True
168
+ if isinstance(left, DataComponent) and isinstance(right, DataComponent):
169
+ nullable = left.nullable or right.nullable
170
+ elif isinstance(left, DataComponent):
171
+ nullable = left.nullable or right.data_type == Null
172
+ elif isinstance(right, DataComponent):
173
+ nullable = left.data_type == Null or right.nullable
174
+ return DataComponent(
175
+ name=comp_name,
176
+ data=None,
177
+ data_type=binary_implicit_promotion(left.data_type, right.data_type),
178
+ role=Role.MEASURE,
179
+ nullable=nullable,
180
+ )
181
+
182
+ # Dataset
183
+ if isinstance(left, Scalar) and isinstance(right, Scalar):
184
+ raise SemanticError(
185
+ "1-1-9-12", op=cls.op, then_symbol=left.name, else_symbol=right.name
186
+ )
187
+ if isinstance(left, DataComponent):
188
+ raise SemanticError(
189
+ "1-1-9-12", op=cls.op, then_symbol=left.name, else_symbol=right.name
190
+ )
191
+ if isinstance(left, Scalar):
192
+ left.data_type = right.data_type = binary_implicit_promotion(
193
+ left.data_type, right.data_type
194
+ )
195
+ return Dataset(name=dataset_name, components=copy(condition.components), data=None)
196
+ if left.get_identifiers() != condition.get_identifiers():
197
+ raise SemanticError("1-1-9-10", op=cls.op, clause=left.name)
198
+ if isinstance(right, Scalar):
199
+ for component in left.get_measures():
200
+ if component.data_type != right.data_type:
201
+ component.data_type = binary_implicit_promotion(
202
+ component.data_type, right.data_type
203
+ )
204
+ if isinstance(right, Dataset):
205
+ if left.get_identifiers() != condition.get_identifiers():
206
+ raise SemanticError("1-1-9-10", op=cls.op, clause=right.name)
207
+ if left.get_components_names() != right.get_components_names():
208
+ raise SemanticError("1-1-9-13", op=cls.op, then=left.name, else_clause=right.name)
209
+ for component in left.get_measures():
210
+ if component.data_type != right.components[component.name].data_type:
211
+ component.data_type = right.components[component.name].data_type = (
212
+ binary_implicit_promotion(
213
+ component.data_type,
214
+ right.components[component.name].data_type,
215
+ )
216
+ )
217
+ if isinstance(condition, Dataset):
218
+ if len(condition.get_measures()) != 1:
219
+ raise SemanticError("1-1-9-4", op=cls.op, name=condition.name)
220
+ if condition.get_measures()[0].data_type != Boolean:
221
+ raise SemanticError(
222
+ "1-1-9-5",
223
+ op=cls.op,
224
+ type=SCALAR_TYPES_CLASS_REVERSE[condition.get_measures()[0].data_type],
225
+ )
226
+ if left.get_identifiers() != condition.get_identifiers():
227
+ raise SemanticError("1-1-9-6", op=cls.op)
228
+ result_components = {comp_name: copy(comp) for comp_name, comp in left.components.items()}
229
+ return Dataset(name=dataset_name, components=result_components, data=None)
230
+
231
+
232
+ class Nvl(Binary):
233
+ """
234
+ Null class:
235
+ `Nvl <https://sdmx.org/wp-content/uploads/VTL-2.1-Reference-Manual.pdf#page=229&zoom=100,72,370>`_operator class.
236
+ It has the following class methods:
237
+
238
+ Class methods:
239
+ Validate: Class method that validates if the operation at scalar,
240
+ datacomponent or dataset level can be performed.
241
+ Evaluate: Evaluates the actual operation, returning the result.
242
+ """ # noqa E501
243
+
244
+ @classmethod
245
+ def evaluate(cls, left: Any, right: Any) -> Union[Scalar, DataComponent, Dataset]:
246
+ result = cls.validate(left, right)
247
+
248
+ if isinstance(left, Scalar) and isinstance(result, Scalar):
249
+ if left.data_type is Null:
250
+ result.data_type = right.data_type
251
+ result.value = right.value
252
+ elif right.data_type is Null:
253
+ result.data_type = left.data_type
254
+ result.value = left.value
255
+ else:
256
+ result.data_type = left.data_type
257
+ result.value = left.value
258
+
259
+ else:
260
+ if not isinstance(result, Scalar):
261
+ if isinstance(right, Scalar):
262
+ result.data = left.data.fillna(right.value)
263
+ else:
264
+ result.data = left.data.fillna(right.data)
265
+ if isinstance(result, Dataset):
266
+ result.data = result.data[result.get_components_names()]
267
+ return result
268
+
269
+ @classmethod
270
+ def validate(cls, left: Any, right: Any) -> Union[Scalar, DataComponent, Dataset]:
271
+ dataset_name = VirtualCounter._new_ds_name()
272
+ comp_name = VirtualCounter._new_dc_name()
273
+ result_components = {}
274
+ if isinstance(left, Scalar):
275
+ if not isinstance(right, Scalar):
276
+ raise ValueError(
277
+ "Nvl operation at scalar level must have scalar "
278
+ "types on right (applicable) side"
279
+ )
280
+ cls.type_validation(left.data_type, right.data_type)
281
+ return Scalar(name="result", value=None, data_type=left.data_type)
282
+ if isinstance(left, DataComponent):
283
+ if isinstance(right, Dataset):
284
+ raise ValueError(
285
+ "Nvl operation at component level cannot have "
286
+ "dataset type on right (applicable) side"
287
+ )
288
+ cls.type_validation(left.data_type, right.data_type)
289
+ return DataComponent(
290
+ name=comp_name,
291
+ data=pd.Series(dtype=object),
292
+ data_type=left.data_type,
293
+ role=Role.MEASURE,
294
+ nullable=False,
295
+ )
296
+ if isinstance(left, Dataset):
297
+ if isinstance(right, DataComponent):
298
+ raise ValueError(
299
+ "Nvl operation at dataset level cannot have component "
300
+ "type on right (applicable) side"
301
+ )
302
+ if isinstance(right, Scalar):
303
+ for component in left.get_measures():
304
+ cls.type_validation(component.data_type, right.data_type)
305
+ if isinstance(right, Dataset):
306
+ for component in left.get_measures():
307
+ cls.type_validation(
308
+ component.data_type, right.components[component.name].data_type
309
+ )
310
+ result_components = {
311
+ comp_name: copy(comp)
312
+ for comp_name, comp in left.components.items()
313
+ if comp.role != Role.ATTRIBUTE
314
+ }
315
+ for comp in result_components.values():
316
+ comp.nullable = False
317
+ return Dataset(name=dataset_name, components=result_components, data=None)
318
+
319
+
320
+ class Case(Operator):
321
+ @classmethod
322
+ def evaluate(
323
+ cls, conditions: List[Any], thenOps: List[Any], elseOp: Any
324
+ ) -> Union[Scalar, DataComponent, Dataset]:
325
+ result = cls.validate(conditions, thenOps, elseOp)
326
+
327
+ for condition in conditions:
328
+ if isinstance(condition, Dataset) and condition.data is not None:
329
+ condition.data.fillna(False, inplace=True)
330
+ condition_measure = condition.get_measures_names()[0]
331
+ if condition.data[condition_measure].dtype != bool:
332
+ condition.data[condition_measure] = condition.data[condition_measure].astype(
333
+ bool
334
+ )
335
+ elif (
336
+ isinstance(
337
+ condition,
338
+ DataComponent,
339
+ )
340
+ and condition.data is not None
341
+ ):
342
+ condition.data.fillna(False, inplace=True)
343
+ if condition.data.dtype != bool:
344
+ condition.data = condition.data.astype(bool)
345
+ elif isinstance(condition, Scalar) and condition.value is None:
346
+ condition.value = False
347
+
348
+ if isinstance(result, Scalar):
349
+ result.value = elseOp.value
350
+ for i in range(len(conditions)):
351
+ if conditions[i].value:
352
+ result.value = thenOps[i].value
353
+
354
+ if isinstance(result, DataComponent):
355
+ full_index = conditions[0].data.index
356
+ result.data = pd.Series(None, index=full_index)
357
+
358
+ for i, condition in enumerate(conditions):
359
+ if isinstance(thenOps[i], Scalar):
360
+ value_series = pd.Series(thenOps[i].value, index=full_index)
361
+ else:
362
+ value_series = thenOps[i].data.reindex(full_index)
363
+ cond_series = condition.data.reindex(full_index)
364
+ cond_mask = cond_series.notna() & cond_series == True
365
+ result_data = result.data.copy()
366
+ result_data[cond_mask] = value_series[cond_mask]
367
+ result.data = result_data
368
+
369
+ conditions_stack = [c.data.reindex(full_index).fillna(False) for c in conditions]
370
+ else_cond_mask = (
371
+ ~np.logical_or.reduce(conditions_stack)
372
+ if conditions_stack
373
+ else pd.Series(True, index=full_index)
374
+ )
375
+ if isinstance(elseOp, Scalar):
376
+ else_series = pd.Series(elseOp.value, index=full_index)
377
+ else:
378
+ else_series = elseOp.data.reindex(full_index)
379
+ result.data[else_cond_mask] = else_series[else_cond_mask]
380
+
381
+ elif isinstance(result, Dataset):
382
+ identifiers = result.get_identifiers_names()
383
+ columns = [col for col in result.get_components_names() if col not in identifiers]
384
+ result.data = (
385
+ conditions[0].data[identifiers]
386
+ if conditions[0].data is not None
387
+ else pd.DataFrame(columns=identifiers)
388
+ ).copy()
389
+
390
+ full_index = result.data.index
391
+ for i in range(len(conditions)):
392
+ condition = conditions[i]
393
+ bool_col = next(x.name for x in condition.get_measures() if x.data_type == Boolean)
394
+ cond_mask = condition.data[bool_col].reindex(full_index).astype(bool)
395
+
396
+ if isinstance(thenOps[i], Scalar):
397
+ for col in columns:
398
+ result.data.loc[cond_mask, col] = thenOps[i].value
399
+ else:
400
+ cond_df = thenOps[i].data.reindex(full_index)
401
+ result.data.loc[cond_mask, columns] = cond_df.loc[cond_mask, columns]
402
+
403
+ then_cond_masks = [
404
+ c.data[next(x.name for x in c.get_measures() if x.data_type == Boolean)]
405
+ .reindex(full_index)
406
+ .fillna(False)
407
+ .astype(bool)
408
+ for c in conditions
409
+ ]
410
+ else_cond_mask = (
411
+ ~np.logical_or.reduce(then_cond_masks)
412
+ if then_cond_masks
413
+ else pd.Series(True, index=full_index)
414
+ )
415
+
416
+ if isinstance(elseOp, Scalar):
417
+ for col in columns:
418
+ result.data.loc[else_cond_mask, col] = elseOp.value
419
+ else:
420
+ else_df = elseOp.data.reindex(full_index)
421
+ result.data.loc[else_cond_mask, columns] = else_df.loc[else_cond_mask, columns]
422
+
423
+ return result
424
+
425
+ @classmethod
426
+ def validate(
427
+ cls, conditions: List[Any], thenOps: List[Any], elseOp: Any
428
+ ) -> Union[Scalar, DataComponent, Dataset]:
429
+ dataset_name = VirtualCounter._new_ds_name()
430
+ comp_name = VirtualCounter._new_dc_name()
431
+ if len(set(map(type, conditions))) > 1:
432
+ raise SemanticError("2-1-9-1", op=cls.op)
433
+
434
+ ops = thenOps + [elseOp]
435
+ then_else_types = set(map(type, ops))
436
+ condition_type = type(conditions[0])
437
+
438
+ if condition_type is Scalar:
439
+ for condition in conditions:
440
+ if condition.data_type != Boolean:
441
+ raise SemanticError("2-1-9-2", op=cls.op, name=condition.name)
442
+ if list(then_else_types) != [Scalar]:
443
+ raise SemanticError("2-1-9-3", op=cls.op)
444
+
445
+ # The output data type is the data type of the last then operation that has a true
446
+ # condition, defaulting to the data type of the else operation if no condition is true
447
+ output_data_type = elseOp.data_type
448
+ for i in range(len(conditions)):
449
+ if conditions[i].value:
450
+ output_data_type = thenOps[i].data_type
451
+
452
+ return Scalar(
453
+ name="result",
454
+ value=None,
455
+ data_type=output_data_type,
456
+ )
457
+
458
+ elif condition_type is DataComponent:
459
+ for condition in conditions:
460
+ if not condition.data_type == Boolean:
461
+ raise SemanticError("2-1-9-4", op=cls.op, name=condition.name)
462
+
463
+ nullable = any(
464
+ (op.nullable if isinstance(op, DataComponent) else op.data_type == Null)
465
+ for op in ops
466
+ )
467
+ data_type = ops[0].data_type
468
+ for op in ops[1:]:
469
+ data_type = binary_implicit_promotion(data_type, op.data_type)
470
+
471
+ return DataComponent(
472
+ name=comp_name,
473
+ data=None,
474
+ data_type=data_type,
475
+ role=Role.MEASURE,
476
+ nullable=nullable,
477
+ )
478
+
479
+ # Dataset
480
+ for condition in conditions:
481
+ if len(condition.get_measures_names()) != 1:
482
+ raise SemanticError("1-1-1-4", op=cls.op)
483
+ if condition.get_measures()[0].data_type != Boolean:
484
+ raise SemanticError("2-1-9-5", op=cls.op, name=condition.name)
485
+
486
+ if Dataset not in then_else_types:
487
+ raise SemanticError("2-1-9-6", op=cls.op)
488
+
489
+ components = next(op for op in ops if isinstance(op, Dataset)).components
490
+ comp_names = [comp.name for comp in components.values()]
491
+ for op in ops:
492
+ if isinstance(op, Dataset) and op.get_components_names() != comp_names:
493
+ raise SemanticError("2-1-9-7", op=cls.op)
494
+
495
+ return Dataset(name=dataset_name, components=components, data=None)
@@ -0,0 +1,191 @@
1
+ import re
2
+ from typing import Any, Dict, List, Union
3
+
4
+ import duckdb
5
+ import pandas as pd
6
+
7
+ from vtlengine.DataTypes import COMP_NAME_MAPPING
8
+ from vtlengine.Exceptions import SemanticError
9
+ from vtlengine.Model import Component, DataComponent, Dataset, ExternalRoutine, Role
10
+ from vtlengine.Operators import Binary, Unary
11
+ from vtlengine.Utils.__Virtual_Assets import VirtualCounter
12
+
13
+
14
+ class Membership(Binary):
15
+ """Membership operator class.
16
+
17
+ It inherits from Binary class and has the following class methods:
18
+
19
+ Class methods:
20
+ Validate: Checks if str right operand is actually within the Dataset.
21
+ Evaluate: Checks validate operation and return the dataset to perform it.
22
+ """
23
+
24
+ @classmethod
25
+ def validate(cls, left_operand: Any, right_operand: Any) -> Dataset:
26
+ dataset_name = VirtualCounter._new_ds_name()
27
+ if right_operand not in left_operand.components:
28
+ raise SemanticError(
29
+ "1-1-1-10",
30
+ op=cls.op,
31
+ comp_name=right_operand,
32
+ dataset_name=left_operand.name,
33
+ )
34
+
35
+ component = left_operand.components[right_operand]
36
+ if component.role in (Role.IDENTIFIER, Role.ATTRIBUTE):
37
+ right_operand = COMP_NAME_MAPPING[component.data_type]
38
+ left_operand.components[right_operand] = Component(
39
+ name=right_operand,
40
+ data_type=component.data_type,
41
+ role=Role.MEASURE,
42
+ nullable=component.nullable,
43
+ )
44
+ if left_operand.data is not None:
45
+ left_operand.data[right_operand] = left_operand.data[component.name]
46
+ result_components = {
47
+ name: comp
48
+ for name, comp in left_operand.components.items()
49
+ if comp.role == Role.IDENTIFIER or comp.name == right_operand
50
+ }
51
+ result_dataset = Dataset(name=dataset_name, components=result_components, data=None)
52
+ return result_dataset
53
+
54
+ @classmethod
55
+ def evaluate(
56
+ cls,
57
+ left_operand: Dataset,
58
+ right_operand: str,
59
+ is_from_component_assignment: bool = False,
60
+ ) -> Union[DataComponent, Dataset]:
61
+ result_dataset = cls.validate(left_operand, right_operand)
62
+ if left_operand.data is not None:
63
+ if is_from_component_assignment:
64
+ return DataComponent(
65
+ name=right_operand,
66
+ data_type=left_operand.components[right_operand].data_type,
67
+ role=Role.MEASURE,
68
+ nullable=left_operand.components[right_operand].nullable,
69
+ data=left_operand.data[right_operand],
70
+ )
71
+ result_dataset.data = left_operand.data[list(result_dataset.components.keys())]
72
+ return result_dataset
73
+
74
+
75
+ class Alias(Binary):
76
+ """Alias operator class
77
+ It inherits from Binary class, and has the following class methods:
78
+
79
+ Class methods:
80
+ Validate: Ensures the name given in the right operand is different from the
81
+ name of the Dataset. Evaluate: Checks if the data between both operators are the same.
82
+ """
83
+
84
+ @classmethod
85
+ def validate(cls, left_operand: Dataset, right_operand: Union[str, Dataset]) -> Dataset:
86
+ new_name = right_operand if isinstance(right_operand, str) else right_operand.name
87
+ if new_name != left_operand.name and new_name in left_operand.get_components_names():
88
+ raise SemanticError("1-3-1", alias=new_name)
89
+ return Dataset(name=new_name, components=left_operand.components, data=None)
90
+
91
+ @classmethod
92
+ def evaluate(cls, left_operand: Dataset, right_operand: Union[str, Dataset]) -> Dataset:
93
+ result = cls.validate(left_operand, right_operand)
94
+ result.data = left_operand.data
95
+ return result
96
+
97
+
98
+ class Eval(Unary):
99
+ """Eval operator class
100
+ It inherits from Unary class and has the following class methods
101
+
102
+ Class methods:
103
+ Validate: checks if the external routine name is the same as the operand name,
104
+ which must be a Dataset.
105
+ Evaluate: Checks if the operand and the output is actually a Dataset.
106
+
107
+ """
108
+
109
+ @staticmethod
110
+ def _execute_query(
111
+ query: str, dataset_names: List[str], data: Dict[str, pd.DataFrame]
112
+ ) -> pd.DataFrame:
113
+ query = re.sub(r'"([^"]*)"', r"'\1'", query)
114
+ for forbidden in ["INSTALL", "LOAD"]:
115
+ if re.search(rf"\b{forbidden}\b", query, re.IGNORECASE):
116
+ raise Exception(f"Query contains forbidden command: {forbidden}")
117
+ if re.search(r"FROM\s+'https?://", query, re.IGNORECASE):
118
+ raise Exception("Query contains forbidden URL in FROM clause")
119
+ try:
120
+ conn = duckdb.connect(database=":memory:", read_only=False)
121
+ conn.execute("SET enable_external_access = false")
122
+ conn.execute("SET allow_unsigned_extensions = false")
123
+ conn.execute("SET allow_community_extensions = false")
124
+ conn.execute("SET autoinstall_known_extensions = false")
125
+ conn.execute("SET autoload_known_extensions = false")
126
+ conn.execute("SET lock_configuration = true")
127
+
128
+ try:
129
+ for ds_name in dataset_names:
130
+ df = data[ds_name]
131
+ conn.register(ds_name, df)
132
+ df_result = conn.execute(query).fetchdf()
133
+ conn.close()
134
+ except Exception as e:
135
+ conn.close()
136
+ raise Exception(f"Error executing SQL query: {e}")
137
+ except Exception as e:
138
+ raise Exception(f"Error connecting to DuckDB in memory: {e}")
139
+ return df_result
140
+
141
+ @classmethod
142
+ def validate( # type: ignore[override]
143
+ cls,
144
+ operands: Dict[str, Dataset],
145
+ external_routine: ExternalRoutine,
146
+ output: Dataset,
147
+ ) -> Dataset:
148
+ empty_data_dict = {}
149
+ for ds_name in external_routine.dataset_names:
150
+ if ds_name not in operands:
151
+ raise ValueError(
152
+ f"External Routine dataset {ds_name} is not present in Eval operands"
153
+ )
154
+ empty_data = pd.DataFrame(
155
+ columns=[comp.name for comp in operands[ds_name].components.values()]
156
+ )
157
+ empty_data_dict[ds_name] = empty_data
158
+
159
+ df = cls._execute_query(
160
+ external_routine.query, external_routine.dataset_names, empty_data_dict
161
+ )
162
+ component_names = df.columns.tolist()
163
+ for comp_name in component_names:
164
+ if comp_name not in output.components:
165
+ raise SemanticError(
166
+ "1-1-1-10", op=cls.op, comp_name=comp_name, dataset_name=output.name
167
+ )
168
+
169
+ for comp_name in output.components:
170
+ if comp_name not in component_names:
171
+ raise ValueError(f"Component {comp_name} not found in External Routine result")
172
+
173
+ output.name = external_routine.name
174
+
175
+ return output
176
+
177
+ @classmethod
178
+ def evaluate( # type: ignore[override]
179
+ cls,
180
+ operands: Dict[str, Dataset],
181
+ external_routine: ExternalRoutine,
182
+ output: Dataset,
183
+ ) -> Dataset:
184
+ result: Dataset = cls.validate(operands, external_routine, output)
185
+ operands_data_dict = {ds_name: operands[ds_name].data for ds_name in operands}
186
+ result.data = cls._execute_query(
187
+ external_routine.query,
188
+ external_routine.dataset_names,
189
+ operands_data_dict, # type: ignore[arg-type]
190
+ )
191
+ return result