vtlengine 1.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vtlengine might be problematic. Click here for more details.

Files changed (56) hide show
  1. vtlengine/API/_InternalApi.py +159 -102
  2. vtlengine/API/__init__.py +110 -68
  3. vtlengine/AST/ASTConstructor.py +188 -98
  4. vtlengine/AST/ASTConstructorModules/Expr.py +402 -205
  5. vtlengine/AST/ASTConstructorModules/ExprComponents.py +248 -104
  6. vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
  7. vtlengine/AST/ASTEncoders.py +1 -1
  8. vtlengine/AST/ASTTemplate.py +24 -9
  9. vtlengine/AST/ASTVisitor.py +8 -12
  10. vtlengine/AST/DAG/__init__.py +43 -35
  11. vtlengine/AST/DAG/_words.py +4 -4
  12. vtlengine/AST/Grammar/Vtl.g4 +49 -20
  13. vtlengine/AST/Grammar/VtlTokens.g4 +13 -1
  14. vtlengine/AST/Grammar/lexer.py +2012 -1312
  15. vtlengine/AST/Grammar/parser.py +7524 -4343
  16. vtlengine/AST/Grammar/tokens.py +140 -128
  17. vtlengine/AST/VtlVisitor.py +16 -5
  18. vtlengine/AST/__init__.py +41 -11
  19. vtlengine/DataTypes/NumericTypesHandling.py +5 -4
  20. vtlengine/DataTypes/TimeHandling.py +196 -301
  21. vtlengine/DataTypes/__init__.py +304 -218
  22. vtlengine/Exceptions/__init__.py +96 -27
  23. vtlengine/Exceptions/messages.py +149 -69
  24. vtlengine/Interpreter/__init__.py +817 -497
  25. vtlengine/Model/__init__.py +172 -121
  26. vtlengine/Operators/Aggregation.py +156 -95
  27. vtlengine/Operators/Analytic.py +167 -79
  28. vtlengine/Operators/Assignment.py +7 -4
  29. vtlengine/Operators/Boolean.py +27 -32
  30. vtlengine/Operators/CastOperator.py +177 -131
  31. vtlengine/Operators/Clause.py +137 -99
  32. vtlengine/Operators/Comparison.py +148 -117
  33. vtlengine/Operators/Conditional.py +290 -98
  34. vtlengine/Operators/General.py +68 -47
  35. vtlengine/Operators/HROperators.py +91 -72
  36. vtlengine/Operators/Join.py +217 -118
  37. vtlengine/Operators/Numeric.py +129 -46
  38. vtlengine/Operators/RoleSetter.py +16 -15
  39. vtlengine/Operators/Set.py +61 -36
  40. vtlengine/Operators/String.py +213 -139
  41. vtlengine/Operators/Time.py +467 -215
  42. vtlengine/Operators/Validation.py +117 -76
  43. vtlengine/Operators/__init__.py +340 -213
  44. vtlengine/Utils/__init__.py +232 -41
  45. vtlengine/__init__.py +1 -1
  46. vtlengine/files/output/__init__.py +15 -6
  47. vtlengine/files/output/_time_period_representation.py +10 -9
  48. vtlengine/files/parser/__init__.py +79 -52
  49. vtlengine/files/parser/_rfc_dialect.py +6 -5
  50. vtlengine/files/parser/_time_checking.py +48 -37
  51. vtlengine-1.0.2.dist-info/METADATA +245 -0
  52. vtlengine-1.0.2.dist-info/RECORD +58 -0
  53. {vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/WHEEL +1 -1
  54. vtlengine-1.0.dist-info/METADATA +0 -104
  55. vtlengine-1.0.dist-info/RECORD +0 -58
  56. {vtlengine-1.0.dist-info → vtlengine-1.0.2.dist-info}/LICENSE.md +0 -0
@@ -1,12 +1,12 @@
1
- import os
2
- from typing import List
1
+ from typing import List, Any, Dict
3
2
 
4
3
  from vtlengine.Exceptions import SemanticError
5
4
 
6
- if os.environ.get("SPARK"):
7
- import pyspark.pandas as pd
8
- else:
9
- import pandas as pd
5
+ # if os.environ.get("SPARK"):
6
+ # import pyspark.pandas as pd
7
+ # else:
8
+ # import pandas as pd
9
+ import pandas as pd
10
10
 
11
11
  from vtlengine.Model import Dataset
12
12
  from vtlengine.Operators import Operator
@@ -18,18 +18,22 @@ class Set(Operator):
18
18
  @classmethod
19
19
  def check_same_structure(cls, dataset_1: Dataset, dataset_2: Dataset) -> None:
20
20
  if len(dataset_1.components) != len(dataset_2.components):
21
- raise SemanticError("1-1-17-1", op=cls.op, dataset_1=dataset_1.name,
22
- dataset_2=dataset_2.name)
21
+ raise SemanticError(
22
+ "1-1-17-1", op=cls.op, dataset_1=dataset_1.name, dataset_2=dataset_2.name
23
+ )
23
24
 
24
25
  for comp in dataset_1.components.values():
25
26
  if comp.name not in dataset_2.components:
26
27
  raise Exception(f"Component {comp.name} not found in dataset {dataset_2.name}")
27
28
  second_comp = dataset_2.components[comp.name]
28
- binary_implicit_promotion(comp.data_type, second_comp.data_type, cls.type_to_check,
29
- cls.return_type)
29
+ binary_implicit_promotion(
30
+ comp.data_type, second_comp.data_type, cls.type_to_check, cls.return_type
31
+ )
30
32
  if comp.role != second_comp.role:
31
- raise Exception(f"Component {comp.name} has different roles "
32
- f"in datasets {dataset_1.name} and {dataset_2.name}")
33
+ raise Exception(
34
+ f"Component {comp.name} has different roles "
35
+ f"in datasets {dataset_1.name} and {dataset_2.name}"
36
+ )
33
37
 
34
38
  @classmethod
35
39
  def validate(cls, operands: List[Dataset]) -> Dataset:
@@ -38,7 +42,7 @@ class Set(Operator):
38
42
  for operand in operands[1:]:
39
43
  cls.check_same_structure(base_operand, operand)
40
44
 
41
- result_components = {}
45
+ result_components: Dict[str, Any] = {}
42
46
  for operand in operands:
43
47
  if len(result_components) == 0:
44
48
  result_components = operand.components
@@ -46,7 +50,8 @@ class Set(Operator):
46
50
  for comp_name, comp in operand.components.items():
47
51
  current_comp = result_components[comp_name]
48
52
  result_components[comp_name].data_type = binary_implicit_promotion(
49
- current_comp.data_type, comp.data_type)
53
+ current_comp.data_type, comp.data_type
54
+ )
50
55
  result_components[comp_name].nullable = current_comp.nullable or comp.nullable
51
56
 
52
57
  result = Dataset(name="result", components=result_components, data=None)
@@ -58,10 +63,9 @@ class Union(Set):
58
63
  def evaluate(cls, operands: List[Dataset]) -> Dataset:
59
64
  result = cls.validate(operands)
60
65
  all_datapoints = [ds.data for ds in operands]
61
- result.data = pd.concat(all_datapoints, sort=True,
62
- ignore_index=True)
66
+ result.data = pd.concat(all_datapoints, sort=True, ignore_index=True)
63
67
  identifiers_names = result.get_identifiers_names()
64
- result.data = result.data.drop_duplicates(subset=identifiers_names, keep='first')
68
+ result.data = result.data.drop_duplicates(subset=identifiers_names, keep="first")
65
69
  result.data.reset_index(drop=True, inplace=True)
66
70
  return result
67
71
 
@@ -76,16 +80,22 @@ class Intersection(Set):
76
80
  if result.data is None:
77
81
  result.data = data
78
82
  else:
79
- result.data = result.data.merge(data, how='inner',
80
- on=result.get_identifiers_names())
83
+ if data is None:
84
+ result.data = pd.DataFrame(columns=result.get_identifiers_names())
85
+ break
86
+ result.data = result.data.merge(
87
+ data, how="inner", on=result.get_identifiers_names()
88
+ )
81
89
 
82
- not_identifiers = [col for col in result.get_measures_names() +
83
- result.get_attributes_names()]
90
+ not_identifiers = [
91
+ col for col in result.get_measures_names() + result.get_attributes_names()
92
+ ]
84
93
 
85
94
  for col in not_identifiers:
86
95
  result.data[col] = result.data[col + "_x"]
87
96
  result.data = result.data[result.get_identifiers_names() + not_identifiers]
88
- result.data.reset_index(drop=True, inplace=True)
97
+ if result.data is not None:
98
+ result.data.reset_index(drop=True, inplace=True)
89
99
  return result
90
100
 
91
101
 
@@ -96,35 +106,46 @@ class Symdiff(Set):
96
106
  result = cls.validate(operands)
97
107
  all_datapoints = [ds.data for ds in operands]
98
108
  for data in all_datapoints:
109
+ if data is None:
110
+ data = pd.DataFrame(columns=result.get_identifiers_names())
99
111
  if result.data is None:
100
112
  result.data = data
101
113
  else:
102
114
  # Realiza la operación equivalente en pyspark.pandas
103
- result.data = result.data.merge(data, how='outer',
104
- on=result.get_identifiers_names(),
105
- suffixes=('_x', '_y'))
115
+ result.data = result.data.merge(
116
+ data, how="outer", on=result.get_identifiers_names(), suffixes=("_x", "_y")
117
+ )
106
118
 
107
119
  for measure in result.get_measures_names():
108
- result.data['_merge'] = result.data.apply(
109
- lambda row: 'left_only' if pd.isnull(row[measure + '_y']) else (
110
- 'right_only' if pd.isnull(row[measure + '_x']) else 'both'),
111
- axis=1
120
+ result.data["_merge"] = result.data.apply(
121
+ lambda row: (
122
+ "left_only"
123
+ if pd.isnull(row[measure + "_y"])
124
+ else ("right_only" if pd.isnull(row[measure + "_x"]) else "both")
125
+ ),
126
+ axis=1,
112
127
  )
113
128
 
114
129
  not_identifiers = result.get_measures_names() + result.get_attributes_names()
115
130
  for col in not_identifiers:
116
131
  result.data[col] = result.data.apply(
117
- lambda x, c=col: x[c + '_x'] if x['_merge'] == 'left_only' else (
118
- x[c + '_y'] if x['_merge'] == 'right_only' else None), axis=1)
132
+ lambda x, c=col: (
133
+ x[c + "_x"]
134
+ if x["_merge"] == "left_only"
135
+ else (x[c + "_y"] if x["_merge"] == "right_only" else None)
136
+ ),
137
+ axis=1,
138
+ )
119
139
  result.data = result.data[result.get_identifiers_names() + not_identifiers].dropna()
120
- result.data = result.data.reset_index(drop=True)
140
+ if result.data is not None:
141
+ result.data = result.data.reset_index(drop=True)
121
142
  return result
122
143
 
123
144
 
124
145
  class Setdiff(Set):
125
146
 
126
147
  @staticmethod
127
- def has_null(row):
148
+ def has_null(row: Any) -> bool:
128
149
  return row.isnull().any()
129
150
 
130
151
  @classmethod
@@ -135,12 +156,15 @@ class Setdiff(Set):
135
156
  if result.data is None:
136
157
  result.data = data
137
158
  else:
159
+ if data is None:
160
+ data = pd.DataFrame(columns=result.get_identifiers_names())
138
161
  result.data = result.data.merge(data, how="left", on=result.get_identifiers_names())
139
162
  if len(result.data) > 0:
140
163
  result.data = result.data[result.data.apply(cls.has_null, axis=1)]
141
164
 
142
- not_identifiers = [col for col in result.get_measures_names() +
143
- result.get_attributes_names()]
165
+ not_identifiers = [
166
+ col for col in result.get_measures_names() + result.get_attributes_names()
167
+ ]
144
168
  for col in not_identifiers:
145
169
  if col + "_x" in result.data:
146
170
  result.data[col] = result.data[col + "_x"]
@@ -148,5 +172,6 @@ class Setdiff(Set):
148
172
  if col + "_y" in result.data:
149
173
  del result.data[col + "_y"]
150
174
  result.data = result.data[result.get_identifiers_names() + not_identifiers]
151
- result.data.reset_index(drop=True, inplace=True)
175
+ if result.data is not None:
176
+ result.data.reset_index(drop=True, inplace=True)
152
177
  return result