vtlengine 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vtlengine might be problematic. Click here for more details.

Files changed (54) hide show
  1. vtlengine/API/_InternalApi.py +153 -100
  2. vtlengine/API/__init__.py +109 -67
  3. vtlengine/AST/ASTConstructor.py +188 -98
  4. vtlengine/AST/ASTConstructorModules/Expr.py +306 -200
  5. vtlengine/AST/ASTConstructorModules/ExprComponents.py +172 -102
  6. vtlengine/AST/ASTConstructorModules/Terminals.py +158 -95
  7. vtlengine/AST/ASTEncoders.py +1 -1
  8. vtlengine/AST/ASTTemplate.py +8 -9
  9. vtlengine/AST/ASTVisitor.py +8 -12
  10. vtlengine/AST/DAG/__init__.py +43 -35
  11. vtlengine/AST/DAG/_words.py +4 -4
  12. vtlengine/AST/Grammar/lexer.py +732 -142
  13. vtlengine/AST/Grammar/parser.py +2188 -826
  14. vtlengine/AST/Grammar/tokens.py +128 -128
  15. vtlengine/AST/VtlVisitor.py +7 -4
  16. vtlengine/AST/__init__.py +22 -11
  17. vtlengine/DataTypes/NumericTypesHandling.py +5 -4
  18. vtlengine/DataTypes/TimeHandling.py +194 -301
  19. vtlengine/DataTypes/__init__.py +304 -218
  20. vtlengine/Exceptions/__init__.py +52 -27
  21. vtlengine/Exceptions/messages.py +134 -62
  22. vtlengine/Interpreter/__init__.py +781 -487
  23. vtlengine/Model/__init__.py +165 -121
  24. vtlengine/Operators/Aggregation.py +156 -95
  25. vtlengine/Operators/Analytic.py +115 -59
  26. vtlengine/Operators/Assignment.py +7 -4
  27. vtlengine/Operators/Boolean.py +27 -32
  28. vtlengine/Operators/CastOperator.py +177 -131
  29. vtlengine/Operators/Clause.py +137 -99
  30. vtlengine/Operators/Comparison.py +148 -117
  31. vtlengine/Operators/Conditional.py +149 -98
  32. vtlengine/Operators/General.py +68 -47
  33. vtlengine/Operators/HROperators.py +91 -72
  34. vtlengine/Operators/Join.py +217 -118
  35. vtlengine/Operators/Numeric.py +89 -44
  36. vtlengine/Operators/RoleSetter.py +16 -15
  37. vtlengine/Operators/Set.py +61 -36
  38. vtlengine/Operators/String.py +213 -139
  39. vtlengine/Operators/Time.py +334 -216
  40. vtlengine/Operators/Validation.py +117 -76
  41. vtlengine/Operators/__init__.py +340 -213
  42. vtlengine/Utils/__init__.py +195 -40
  43. vtlengine/__init__.py +1 -1
  44. vtlengine/files/output/__init__.py +15 -6
  45. vtlengine/files/output/_time_period_representation.py +10 -9
  46. vtlengine/files/parser/__init__.py +77 -52
  47. vtlengine/files/parser/_rfc_dialect.py +6 -5
  48. vtlengine/files/parser/_time_checking.py +46 -37
  49. vtlengine-1.0.1.dist-info/METADATA +236 -0
  50. vtlengine-1.0.1.dist-info/RECORD +58 -0
  51. {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/WHEEL +1 -1
  52. vtlengine-1.0.dist-info/METADATA +0 -104
  53. vtlengine-1.0.dist-info/RECORD +0 -58
  54. {vtlengine-1.0.dist-info → vtlengine-1.0.1.dist-info}/LICENSE.md +0 -0
@@ -1,4 +1,3 @@
1
- import os
2
1
  from copy import copy
3
2
  from typing import List, Optional
4
3
 
@@ -6,21 +5,33 @@ import duckdb
6
5
 
7
6
  from vtlengine.Exceptions import SemanticError
8
7
 
9
- if os.environ.get("SPARK"):
10
- import pyspark.pandas as pd
11
- else:
12
- import pandas as pd
8
+ # if os.environ.get("SPARK"):
9
+ # import pyspark.pandas as pd
10
+ # else:
11
+ # import pandas as pd
12
+ import pandas as pd
13
13
 
14
14
  import vtlengine.Operators as Operator
15
15
  from vtlengine.AST import OrderBy, Windowing
16
- from vtlengine.AST.Grammar.tokens import AVG, COUNT, FIRST_VALUE, LAG, LAST_VALUE, LEAD, MAX, \
17
- MEDIAN, MIN, \
18
- RANK, RATIO_TO_REPORT, STDDEV_POP, \
19
- STDDEV_SAMP, \
20
- SUM, VAR_POP, \
21
- VAR_SAMP
22
- from vtlengine.DataTypes import COMP_NAME_MAPPING, Integer, Number, \
23
- unary_implicit_promotion
16
+ from vtlengine.AST.Grammar.tokens import (
17
+ AVG,
18
+ COUNT,
19
+ FIRST_VALUE,
20
+ LAG,
21
+ LAST_VALUE,
22
+ LEAD,
23
+ MAX,
24
+ MEDIAN,
25
+ MIN,
26
+ RANK,
27
+ RATIO_TO_REPORT,
28
+ STDDEV_POP,
29
+ STDDEV_SAMP,
30
+ SUM,
31
+ VAR_POP,
32
+ VAR_SAMP,
33
+ )
34
+ from vtlengine.DataTypes import COMP_NAME_MAPPING, Integer, Number, unary_implicit_promotion
24
35
  from vtlengine.Model import Component, Dataset, Role
25
36
 
26
37
 
@@ -36,14 +47,18 @@ class Analytic(Operator.Unary):
36
47
  analyticfunc: Specify class method that returns a dataframe using the duckdb library.
37
48
  Evaluate: Ensures the type of data is the correct one to perform the Analytic operators.
38
49
  """
39
- sql_op = None
50
+
51
+ sql_op: Optional[str] = None
40
52
 
41
53
  @classmethod
42
- def validate(cls, operand: Dataset,
43
- partitioning: List[str],
44
- ordering: Optional[List[OrderBy]],
45
- window: Optional[Windowing],
46
- params: Optional[List[int]]) -> Dataset:
54
+ def validate( # type: ignore[override]
55
+ cls,
56
+ operand: Dataset,
57
+ partitioning: List[str],
58
+ ordering: Optional[List[OrderBy]],
59
+ window: Optional[Windowing],
60
+ params: Optional[List[int]],
61
+ ) -> Dataset:
47
62
  if ordering is None:
48
63
  order_components = []
49
64
  else:
@@ -53,15 +68,21 @@ class Analytic(Operator.Unary):
53
68
 
54
69
  for comp_name in partitioning:
55
70
  if comp_name not in operand.components:
56
- raise SemanticError("1-1-1-10", op=cls.op, comp_name=comp_name,
57
- dataset_name=operand.name)
71
+ raise SemanticError(
72
+ "1-1-1-10", op=cls.op, comp_name=comp_name, dataset_name=operand.name
73
+ )
58
74
  if comp_name not in identifier_names:
59
- raise SemanticError("1-1-3-2", op=cls.op, id_name=comp_name,
60
- id_type=operand.components[comp_name].role)
75
+ raise SemanticError(
76
+ "1-1-3-2",
77
+ op=cls.op,
78
+ id_name=comp_name,
79
+ id_type=operand.components[comp_name].role,
80
+ )
61
81
  for comp_name in order_components:
62
82
  if comp_name not in operand.components:
63
- raise SemanticError("1-1-1-10", op=cls.op, comp_name=comp_name,
64
- dataset_name=operand.name)
83
+ raise SemanticError(
84
+ "1-1-1-10", op=cls.op, comp_name=comp_name, dataset_name=operand.name
85
+ )
65
86
  measures = operand.get_measures()
66
87
  if measures is None:
67
88
  raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
@@ -79,24 +100,26 @@ class Analytic(Operator.Unary):
79
100
  if len(measures) == 1:
80
101
  del result_components[measures[0].name]
81
102
  result_components[measure_name] = Component(
82
- name=measure_name,
83
- data_type=cls.return_type,
84
- role=Role.MEASURE,
85
- nullable=nullable
103
+ name=measure_name, data_type=cls.return_type, role=Role.MEASURE, nullable=nullable
86
104
  )
87
105
 
88
106
  return Dataset(name="result", components=result_components, data=None)
89
107
 
90
108
  @classmethod
91
- def analyticfunc(cls, df: pd.DataFrame, partitioning: List[str],
92
- identifier_names: List[str],
93
- measure_names: List[str],
94
- ordering: List[OrderBy],
95
- window: Optional[Windowing],
96
- params: Optional[List[int]] = None):
109
+ def analyticfunc(
110
+ cls,
111
+ df: pd.DataFrame,
112
+ partitioning: List[str],
113
+ identifier_names: List[str],
114
+ measure_names: List[str],
115
+ ordering: List[OrderBy],
116
+ window: Optional[Windowing],
117
+ params: Optional[List[int]] = None,
118
+ ) -> pd.DataFrame:
97
119
  """Annotation class
98
120
 
99
- It is used to analyze the attributes specified bellow ensuring that the type of data is the correct one to perform
121
+ It is used to analyze the attributes specified bellow
122
+ ensuring that the type of data is the correct one to perform
100
123
  the operation.
101
124
 
102
125
  Attributes:
@@ -110,18 +133,26 @@ class Analytic(Operator.Unary):
110
133
  window_str = ""
111
134
  if window is not None:
112
135
  mode = "ROWS" if window.type_ == "data" else "RANGE"
113
- start_mode = window.start_mode if window.start_mode != 'current' and window.start != 'CURRENT ROW' else ''
114
- stop_mode = window.stop_mode if window.stop_mode != 'current' and window.stop != 'CURRENT ROW' else ''
115
- if window.start == -1:
116
- window.start = 'UNBOUNDED'
136
+ start_mode = (
137
+ window.start_mode
138
+ if window.start_mode != "current" and window.start != "CURRENT ROW"
139
+ else ""
140
+ )
141
+ stop_mode = (
142
+ window.stop_mode
143
+ if window.stop_mode != "current" and window.stop != "CURRENT ROW"
144
+ else ""
145
+ )
146
+ if isinstance(window.start, int) and window.start == -1:
147
+ window.start = "UNBOUNDED"
117
148
 
118
- if stop_mode == '' and window.stop == 0:
119
- window.stop = 'CURRENT ROW'
149
+ if stop_mode == "" and window.stop == 0:
150
+ window.stop = "CURRENT ROW"
120
151
  window_str = f"{mode} BETWEEN {window.start} {start_mode} AND {window.stop} {stop_mode}"
121
152
 
122
153
  # Partitioning
123
154
  if len(partitioning) > 0:
124
- partition = "PARTITION BY " + ', '.join(partitioning)
155
+ partition = "PARTITION BY " + ", ".join(partitioning)
125
156
  else:
126
157
  partition = ""
127
158
 
@@ -143,7 +174,7 @@ class Analytic(Operator.Unary):
143
174
  elif cls.op == RATIO_TO_REPORT:
144
175
  measure_query = f"CAST({measure} AS REAL) / SUM(CAST({measure} AS REAL))"
145
176
  elif cls.op in [LAG, LEAD]:
146
- measure_query = f"{cls.sql_op}({measure}, {','.join(map(str, params))})"
177
+ measure_query = f"{cls.sql_op}({measure}, {','.join(map(str, params or []))})"
147
178
  else:
148
179
  measure_query = f"{cls.sql_op}({measure})"
149
180
  if cls.op == COUNT and len(measure_names) == 1:
@@ -153,33 +184,42 @@ class Analytic(Operator.Unary):
153
184
  measure_queries.append(measure_query)
154
185
  if cls.op == COUNT and len(measure_names) == 0:
155
186
  measure_queries.append(
156
- f"COUNT(*) {analytic_str} as {COMP_NAME_MAPPING[cls.return_type]}")
187
+ f"COUNT(*) {analytic_str} as {COMP_NAME_MAPPING[cls.return_type]}"
188
+ )
157
189
 
158
- measures_sql = ', '.join(measure_queries)
159
- identifiers_sql = ', '.join(identifier_names)
190
+ measures_sql = ", ".join(measure_queries)
191
+ identifiers_sql = ", ".join(identifier_names)
160
192
  query = f"SELECT {identifiers_sql} , {measures_sql} FROM df"
161
193
 
162
194
  if cls.op == COUNT:
163
195
  df[measure_names] = df[measure_names].fillna(-1)
164
- if os.getenv("SPARK", False):
165
- df = df.to_pandas()
196
+ # if os.getenv("SPARK", False):
197
+ # df = df.to_pandas()
166
198
  return duckdb.query(query).to_df()
167
199
 
168
200
  @classmethod
169
- def evaluate(cls, operand: Dataset,
170
- partitioning: List[str],
171
- ordering: Optional[List[OrderBy]],
172
- window: Optional[Windowing],
173
- params: Optional[List[int]]) -> Dataset:
201
+ def evaluate( # type: ignore[override]
202
+ cls,
203
+ operand: Dataset,
204
+ partitioning: List[str],
205
+ ordering: Optional[List[OrderBy]],
206
+ window: Optional[Windowing],
207
+ params: Optional[List[int]],
208
+ ) -> Dataset:
174
209
  result = cls.validate(operand, partitioning, ordering, window, params)
175
- df = operand.data.copy()
210
+ df = operand.data.copy() if operand.data is not None else pd.DataFrame()
176
211
  measure_names = operand.get_measures_names()
177
212
  identifier_names = operand.get_identifiers_names()
178
213
 
179
- result.data = cls.analyticfunc(df=df, partitioning=partitioning,
180
- identifier_names=identifier_names,
181
- measure_names=measure_names,
182
- ordering=ordering, window=window, params=params)
214
+ result.data = cls.analyticfunc(
215
+ df=df,
216
+ partitioning=partitioning,
217
+ identifier_names=identifier_names,
218
+ measure_names=measure_names,
219
+ ordering=ordering or [],
220
+ window=window,
221
+ params=params,
222
+ )
183
223
  return result
184
224
 
185
225
 
@@ -187,6 +227,7 @@ class Max(Analytic):
187
227
  """
188
228
  Max operator
189
229
  """
230
+
190
231
  op = MAX
191
232
  sql_op = "MAX"
192
233
 
@@ -195,6 +236,7 @@ class Min(Analytic):
195
236
  """
196
237
  Min operator
197
238
  """
239
+
198
240
  op = MIN
199
241
  sql_op = "MIN"
200
242
 
@@ -203,6 +245,7 @@ class Sum(Analytic):
203
245
  """
204
246
  Sum operator
205
247
  """
248
+
206
249
  op = SUM
207
250
  type_to_check = Number
208
251
  return_type = Number
@@ -213,6 +256,7 @@ class Count(Analytic):
213
256
  """
214
257
  Count operator
215
258
  """
259
+
216
260
  op = COUNT
217
261
  type_to_check = None
218
262
  return_type = Integer
@@ -223,6 +267,7 @@ class Avg(Analytic):
223
267
  """
224
268
  Average operator
225
269
  """
270
+
226
271
  op = AVG
227
272
  type_to_check = Number
228
273
  return_type = Number
@@ -233,6 +278,7 @@ class Median(Analytic):
233
278
  """
234
279
  Median operator
235
280
  """
281
+
236
282
  op = MEDIAN
237
283
  type_to_check = Number
238
284
  return_type = Number
@@ -243,6 +289,7 @@ class PopulationStandardDeviation(Analytic):
243
289
  """
244
290
  Population deviation operator
245
291
  """
292
+
246
293
  op = STDDEV_POP
247
294
  type_to_check = Number
248
295
  return_type = Number
@@ -253,6 +300,7 @@ class SampleStandardDeviation(Analytic):
253
300
  """
254
301
  Sample standard deviation operator.
255
302
  """
303
+
256
304
  op = STDDEV_SAMP
257
305
  type_to_check = Number
258
306
  return_type = Number
@@ -263,6 +311,7 @@ class PopulationVariance(Analytic):
263
311
  """
264
312
  Variance operator
265
313
  """
314
+
266
315
  op = VAR_POP
267
316
  type_to_check = Number
268
317
  return_type = Number
@@ -273,6 +322,7 @@ class SampleVariance(Analytic):
273
322
  """
274
323
  Sample variance operator
275
324
  """
325
+
276
326
  op = VAR_SAMP
277
327
  type_to_check = Number
278
328
  return_type = Number
@@ -283,6 +333,7 @@ class FirstValue(Analytic):
283
333
  """
284
334
  First value operator
285
335
  """
336
+
286
337
  op = FIRST_VALUE
287
338
  sql_op = "FIRST"
288
339
 
@@ -291,6 +342,7 @@ class LastValue(Analytic):
291
342
  """
292
343
  Last value operator
293
344
  """
345
+
294
346
  op = LAST_VALUE
295
347
  sql_op = "LAST"
296
348
 
@@ -299,6 +351,7 @@ class Lag(Analytic):
299
351
  """
300
352
  Lag operator
301
353
  """
354
+
302
355
  op = LAG
303
356
  sql_op = "LAG"
304
357
 
@@ -307,6 +360,7 @@ class Lead(Analytic):
307
360
  """
308
361
  Lead operator
309
362
  """
363
+
310
364
  op = LEAD
311
365
  sql_op = "LEAD"
312
366
 
@@ -315,6 +369,7 @@ class Rank(Analytic):
315
369
  """
316
370
  Rank operator
317
371
  """
372
+
318
373
  op = RANK
319
374
  sql_op = "RANK"
320
375
  return_type = Integer
@@ -324,6 +379,7 @@ class RatioToReport(Analytic):
324
379
  """
325
380
  Ratio operator
326
381
  """
382
+
327
383
  op = RATIO_TO_REPORT
328
384
  type_to_check = Number
329
385
  return_type = Number
@@ -1,4 +1,4 @@
1
- from typing import Union
1
+ from typing import Union, Any
2
2
 
3
3
  from vtlengine.Operators import Binary
4
4
 
@@ -11,12 +11,15 @@ ALL_MODEL_TYPES = Union[DataComponent, Dataset]
11
11
  class Assignment(Binary):
12
12
 
13
13
  @classmethod
14
- def validate(cls, left_operand: str, right_operand: ALL_MODEL_TYPES) -> ALL_MODEL_TYPES:
15
- if isinstance(right_operand, DataComponent) and right_operand.role == "IDENTIFIER":
14
+ def validate(cls, left_operand: Any, right_operand: Any) -> ALL_MODEL_TYPES:
15
+ if (
16
+ isinstance(right_operand, DataComponent)
17
+ and right_operand.role.__str__() == "IDENTIFIER"
18
+ ):
16
19
  raise SemanticError("1-1-6-13", op=cls.op, comp_name=right_operand.name)
17
20
  right_operand.name = left_operand
18
21
  return right_operand
19
22
 
20
23
  @classmethod
21
- def evaluate(cls, left_operand: str, right_operand: ALL_MODEL_TYPES) -> ALL_MODEL_TYPES:
24
+ def evaluate(cls, left_operand: Any, right_operand: Any) -> ALL_MODEL_TYPES:
22
25
  return cls.validate(left_operand, right_operand)
@@ -1,11 +1,8 @@
1
- import os
2
-
3
- import numba
4
-
5
- if os.environ.get("SPARK", False):
6
- import pyspark.pandas as pd
7
- else:
8
- import pandas as pd
1
+ # if os.environ.get("SPARK", False):
2
+ # import pyspark.pandas as pd
3
+ # else:
4
+ # import pandas as pd
5
+ import pandas as pd
9
6
 
10
7
  from typing import Optional, Any
11
8
 
@@ -22,22 +19,20 @@ class Unary(Operator.Unary):
22
19
  class Binary(Operator.Binary):
23
20
  type_to_check = Boolean
24
21
  return_type = Boolean
25
- comp_op = None
22
+ comp_op: Any = None
26
23
 
27
24
  @classmethod
28
- def apply_operation_series_scalar(cls, series: pd.Series, scalar: Any,
29
- series_left: bool) -> Any:
25
+ def apply_operation_series_scalar(cls, series: Any, scalar: Any, series_left: bool) -> Any:
30
26
  if series_left:
31
27
  return series.map(lambda x: cls.py_op(x, scalar))
32
28
  else:
33
29
  return series.map(lambda x: cls.py_op(scalar, x))
34
30
 
35
31
  @classmethod
36
- def apply_operation_two_series(cls,
37
- left_series: Any,
38
- right_series: Any) -> Any:
39
- result = cls.comp_op(left_series.astype('bool[pyarrow]'),
40
- right_series.astype('bool[pyarrow]'))
32
+ def apply_operation_two_series(cls, left_series: Any, right_series: Any) -> Any:
33
+ result = cls.comp_op(
34
+ left_series.astype("bool[pyarrow]"), right_series.astype("bool[pyarrow]")
35
+ )
41
36
  return result.replace({pd.NA: None}).astype(object)
42
37
 
43
38
  @classmethod
@@ -50,7 +45,7 @@ class And(Binary):
50
45
  comp_op = pd.Series.__and__
51
46
 
52
47
  @staticmethod
53
- @numba.njit
48
+ # @numba.njit
54
49
  def py_op(x: Optional[bool], y: Optional[bool]) -> Optional[bool]:
55
50
  if (x is None and y == False) or (x == False and y is None):
56
51
  return False
@@ -58,9 +53,9 @@ class And(Binary):
58
53
  return None
59
54
  return x and y
60
55
 
61
- @classmethod
62
- def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
63
- return x & y
56
+ # @classmethod
57
+ # def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
58
+ # return x & y
64
59
 
65
60
 
66
61
  class Or(Binary):
@@ -68,7 +63,7 @@ class Or(Binary):
68
63
  comp_op = pd.Series.__or__
69
64
 
70
65
  @staticmethod
71
- @numba.njit
66
+ # @numba.njit
72
67
  def py_op(x: Optional[bool], y: Optional[bool]) -> Optional[bool]:
73
68
  if (x is None and y == True) or (x == True and y is None):
74
69
  return True
@@ -76,9 +71,9 @@ class Or(Binary):
76
71
  return None
77
72
  return x or y
78
73
 
79
- @classmethod
80
- def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
81
- return x | y
74
+ # @classmethod
75
+ # def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
76
+ # return x | y
82
77
 
83
78
 
84
79
  class Xor(Binary):
@@ -91,23 +86,23 @@ class Xor(Binary):
91
86
  return None
92
87
  return (x and not y) or (not x and y)
93
88
 
94
- @classmethod
95
- def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
96
- return x ^ y
89
+ # @classmethod
90
+ # def spark_op(cls, x: pd.Series, y: pd.Series) -> pd.Series:
91
+ # return x ^ y
97
92
 
98
93
 
99
94
  class Not(Unary):
100
95
  op = NOT
101
96
 
102
97
  @staticmethod
103
- @numba.njit
98
+ # @numba.njit
104
99
  def py_op(x: Optional[bool]) -> Optional[bool]:
105
100
  return None if x is None else not x
106
101
 
107
- @classmethod
108
- def spark_op(cls, series: pd.Series) -> pd.Series:
109
- return ~series
102
+ # @classmethod
103
+ # def spark_op(cls, series: pd.Series) -> pd.Series:
104
+ # return ~series
110
105
 
111
106
  @classmethod
112
107
  def apply_operation_component(cls, series: Any) -> Any:
113
- return series.map(lambda x: not x, na_action='ignore')
108
+ return series.map(lambda x: not x, na_action="ignore")