vtlengine 1.0.3rc2__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vtlengine might be problematic. Click here for more details.

Files changed (47) hide show
  1. vtlengine/API/_InternalApi.py +55 -20
  2. vtlengine/API/__init__.py +11 -2
  3. vtlengine/API/data/schema/json_schema_2.1.json +116 -0
  4. vtlengine/AST/ASTConstructor.py +5 -4
  5. vtlengine/AST/ASTConstructorModules/Expr.py +47 -48
  6. vtlengine/AST/ASTConstructorModules/ExprComponents.py +45 -23
  7. vtlengine/AST/ASTConstructorModules/Terminals.py +21 -11
  8. vtlengine/AST/ASTEncoders.py +1 -1
  9. vtlengine/AST/DAG/__init__.py +0 -3
  10. vtlengine/AST/Grammar/lexer.py +0 -1
  11. vtlengine/AST/Grammar/parser.py +185 -440
  12. vtlengine/AST/VtlVisitor.py +0 -1
  13. vtlengine/DataTypes/TimeHandling.py +50 -15
  14. vtlengine/DataTypes/__init__.py +79 -7
  15. vtlengine/Exceptions/__init__.py +3 -5
  16. vtlengine/Exceptions/messages.py +65 -105
  17. vtlengine/Interpreter/__init__.py +83 -38
  18. vtlengine/Model/__init__.py +7 -9
  19. vtlengine/Operators/Aggregation.py +13 -7
  20. vtlengine/Operators/Analytic.py +48 -9
  21. vtlengine/Operators/Assignment.py +0 -1
  22. vtlengine/Operators/CastOperator.py +44 -44
  23. vtlengine/Operators/Clause.py +16 -10
  24. vtlengine/Operators/Comparison.py +20 -12
  25. vtlengine/Operators/Conditional.py +30 -13
  26. vtlengine/Operators/General.py +9 -4
  27. vtlengine/Operators/HROperators.py +4 -14
  28. vtlengine/Operators/Join.py +15 -14
  29. vtlengine/Operators/Numeric.py +32 -26
  30. vtlengine/Operators/RoleSetter.py +6 -2
  31. vtlengine/Operators/Set.py +12 -8
  32. vtlengine/Operators/String.py +9 -9
  33. vtlengine/Operators/Time.py +136 -116
  34. vtlengine/Operators/Validation.py +10 -4
  35. vtlengine/Operators/__init__.py +56 -69
  36. vtlengine/Utils/__init__.py +6 -1
  37. vtlengine/files/output/__init__.py +0 -1
  38. vtlengine/files/output/_time_period_representation.py +2 -1
  39. vtlengine/files/parser/__init__.py +44 -10
  40. vtlengine/files/parser/_rfc_dialect.py +1 -1
  41. vtlengine/files/parser/_time_checking.py +4 -4
  42. {vtlengine-1.0.3rc2.dist-info → vtlengine-1.0.4.dist-info}/METADATA +9 -7
  43. vtlengine-1.0.4.dist-info/RECORD +58 -0
  44. {vtlengine-1.0.3rc2.dist-info → vtlengine-1.0.4.dist-info}/WHEEL +1 -1
  45. vtlengine/DataTypes/NumericTypesHandling.py +0 -38
  46. vtlengine-1.0.3rc2.dist-info/RECORD +0 -58
  47. {vtlengine-1.0.3rc2.dist-info → vtlengine-1.0.4.dist-info}/LICENSE.md +0 -0
@@ -1,7 +1,7 @@
1
1
  from copy import copy, deepcopy
2
2
  from dataclasses import dataclass
3
3
  from pathlib import Path
4
- from typing import Any, Dict, List, Optional, Union
4
+ from typing import Any, Dict, List, Optional, Type, Union
5
5
 
6
6
  import pandas as pd
7
7
 
@@ -68,10 +68,19 @@ from vtlengine.Operators.CastOperator import Cast
68
68
  from vtlengine.Operators.Comparison import Between, ExistIn
69
69
  from vtlengine.Operators.Conditional import Case, If
70
70
  from vtlengine.Operators.General import Eval
71
- from vtlengine.Operators.HROperators import HAAssignment, Hierarchy, get_measure_from_dataset
71
+ from vtlengine.Operators.HROperators import (
72
+ HAAssignment,
73
+ Hierarchy,
74
+ get_measure_from_dataset,
75
+ )
72
76
  from vtlengine.Operators.Numeric import Round, Trunc
73
77
  from vtlengine.Operators.String import Instr, Replace, Substr
74
- from vtlengine.Operators.Time import Current_Date, Date_Add, Fill_time_series, Time_Aggregation
78
+ from vtlengine.Operators.Time import (
79
+ Current_Date,
80
+ Date_Add,
81
+ Fill_time_series,
82
+ Time_Aggregation,
83
+ )
75
84
  from vtlengine.Operators.Validation import Check, Check_Datapoint, Check_Hierarchy
76
85
  from vtlengine.Utils import (
77
86
  AGGREGATION_MAPPING,
@@ -153,7 +162,9 @@ class InterpreterAnalyzer(ASTTemplate):
153
162
  for ds_name in self.ds_analysis[INSERT][statement_num]:
154
163
  if ds_name in self.datapoints_paths:
155
164
  self.datasets[ds_name].data = load_datapoints(
156
- self.datasets[ds_name].components, ds_name, self.datapoints_paths[ds_name]
165
+ self.datasets[ds_name].components,
166
+ ds_name,
167
+ self.datapoints_paths[ds_name],
157
168
  )
158
169
  elif ds_name in self.datasets and self.datasets[ds_name].data is None:
159
170
  _fill_dataset_empty_data(self.datasets[ds_name])
@@ -180,7 +191,9 @@ class InterpreterAnalyzer(ASTTemplate):
180
191
 
181
192
  # Saving only datasets, no scalars
182
193
  save_datapoints(
183
- self.time_period_representation, self.datasets[ds_name], self.output_path
194
+ self.time_period_representation,
195
+ self.datasets[ds_name],
196
+ self.output_path,
184
197
  )
185
198
  self.datasets[ds_name].data = None
186
199
 
@@ -201,8 +214,9 @@ class InterpreterAnalyzer(ASTTemplate):
201
214
  if isinstance(child, (AST.Assignment, AST.PersistentAssignment)):
202
215
  vtlengine.Exceptions.dataset_output = child.left.value # type: ignore[attr-defined]
203
216
  self._load_datapoints_efficient(statement_num)
204
- if (not isinstance(child, (AST.HRuleset, AST.DPRuleset, AST.Operator)) and
205
- not isinstance(child, (AST.Assignment, AST.PersistentAssignment))):
217
+ if not isinstance(
218
+ child, (AST.HRuleset, AST.DPRuleset, AST.Operator)
219
+ ) and not isinstance(child, (AST.Assignment, AST.PersistentAssignment)):
206
220
  raise SemanticError("1-3-17")
207
221
  result = self.visit(child)
208
222
 
@@ -229,15 +243,14 @@ class InterpreterAnalyzer(ASTTemplate):
229
243
  # Definition Language
230
244
 
231
245
  def visit_Operator(self, node: AST.Operator) -> None:
232
-
233
246
  if self.udos is None:
234
247
  self.udos = {}
235
248
  elif node.op in self.udos:
236
249
  raise ValueError(f"User Defined Operator {node.op} already exists")
237
250
 
238
- param_info = []
251
+ param_info: List[Dict[str, Union[str, Type[ScalarType], AST.AST]]] = []
239
252
  for param in node.parameters:
240
- if param.name in param_info:
253
+ if param.name in [x["name"] for x in param_info]:
241
254
  raise ValueError(f"Duplicated Parameter {param.name} in UDO {node.op}")
242
255
  # We use a string for model types, but the data type class for basic types
243
256
  # (Integer, Number, String, Boolean, ...)
@@ -262,7 +275,6 @@ class InterpreterAnalyzer(ASTTemplate):
262
275
  }
263
276
 
264
277
  def visit_DPRuleset(self, node: AST.DPRuleset) -> None:
265
-
266
278
  # Rule names are optional, if not provided, they are generated.
267
279
  # If provided, all must be provided
268
280
  rule_names = [rule.name for rule in node.rules if rule.name is not None]
@@ -358,7 +370,6 @@ class InterpreterAnalyzer(ASTTemplate):
358
370
  return self.visit_Assignment(node)
359
371
 
360
372
  def visit_BinOp(self, node: AST.BinOp) -> Any:
361
-
362
373
  is_from_if = False
363
374
  if (
364
375
  not self.is_from_condition
@@ -369,8 +380,12 @@ class InterpreterAnalyzer(ASTTemplate):
369
380
  is_from_if = self.is_from_if
370
381
  self.is_from_if = False
371
382
 
372
- if (self.is_from_join and node.op in [MEMBERSHIP, AGGREGATE] and
373
- hasattr(node.left, "value") and hasattr(node.right, "value")):
383
+ if (
384
+ self.is_from_join
385
+ and node.op in [MEMBERSHIP, AGGREGATE]
386
+ and hasattr(node.left, "value")
387
+ and hasattr(node.right, "value")
388
+ ):
374
389
  if self.udo_params is not None and node.right.value in self.udo_params[-1]:
375
390
  comp_name = f"{node.left.value}#{self.udo_params[-1][node.right.value]}"
376
391
  else:
@@ -421,7 +436,10 @@ class InterpreterAnalyzer(ASTTemplate):
421
436
  if node.operand is not None and operand is not None:
422
437
  op_comp: DataComponent = self.visit(node.operand)
423
438
  comps_to_keep = {}
424
- for comp_name, comp in self.regular_aggregation_dataset.components.items():
439
+ for (
440
+ comp_name,
441
+ comp,
442
+ ) in self.regular_aggregation_dataset.components.items():
425
443
  if comp.role == Role.IDENTIFIER:
426
444
  comps_to_keep[comp_name] = copy(comp)
427
445
  comps_to_keep[op_comp.name] = Component(
@@ -744,8 +762,12 @@ class InterpreterAnalyzer(ASTTemplate):
744
762
  is_partial_present = 0
745
763
  found_comp = None
746
764
  for comp_name in self.regular_aggregation_dataset.get_components_names():
747
- if ("#" in comp_name and comp_name.split("#")[1] == node.value or "#"
748
- in node.value and node.value.split("#")[1] == comp_name):
765
+ if (
766
+ "#" in comp_name
767
+ and comp_name.split("#")[1] == node.value
768
+ or "#" in node.value
769
+ and node.value.split("#")[1] == comp_name
770
+ ):
749
771
  is_partial_present += 1
750
772
  found_comp = comp_name
751
773
  if is_partial_present == 0:
@@ -783,7 +805,9 @@ class InterpreterAnalyzer(ASTTemplate):
783
805
  comp_name = self.ruleset_signature[node.value]
784
806
  if comp_name not in self.ruleset_dataset.components:
785
807
  raise SemanticError(
786
- "1-1-1-10", comp_name=node.value, dataset_name=self.ruleset_dataset.name
808
+ "1-1-1-10",
809
+ comp_name=node.value,
810
+ dataset_name=self.ruleset_dataset.name,
787
811
  )
788
812
  data = None if self.rule_data is None else self.rule_data[comp_name]
789
813
  return DataComponent(
@@ -938,7 +962,6 @@ class InterpreterAnalyzer(ASTTemplate):
938
962
  return REGULAR_AGGREGATION_MAPPING[node.op].analyze(operands, dataset)
939
963
 
940
964
  def visit_If(self, node: AST.If) -> Dataset:
941
-
942
965
  self.is_from_condition = True
943
966
  condition = self.visit(node.condition)
944
967
  self.is_from_condition = False
@@ -948,7 +971,10 @@ class InterpreterAnalyzer(ASTTemplate):
948
971
  elseValue = self.visit(node.elseOp)
949
972
  if not isinstance(thenValue, Scalar) or not isinstance(elseValue, Scalar):
950
973
  raise SemanticError(
951
- "1-1-9-3", op="If_op", then_name=thenValue.name, else_name=elseValue.name
974
+ "1-1-9-3",
975
+ op="If_op",
976
+ then_name=thenValue.name,
977
+ else_name=elseValue.name,
952
978
  )
953
979
  if condition.value:
954
980
  return self.visit(node.thenOp)
@@ -1024,7 +1050,9 @@ class InterpreterAnalyzer(ASTTemplate):
1024
1050
 
1025
1051
  def visit_Constant(self, node: AST.Constant) -> Any:
1026
1052
  return Scalar(
1027
- name=str(node.value), value=node.value, data_type=BASIC_TYPES[type(node.value)]
1053
+ name=str(node.value),
1054
+ value=node.value,
1055
+ data_type=BASIC_TYPES[type(node.value)],
1028
1056
  )
1029
1057
 
1030
1058
  def visit_JoinOp(self, node: AST.JoinOp) -> None:
@@ -1130,7 +1158,9 @@ class InterpreterAnalyzer(ASTTemplate):
1130
1158
  for comp_name in node.children[2:]:
1131
1159
  if comp_name.__str__() not in dataset_element.components:
1132
1160
  raise SemanticError(
1133
- "1-1-1-10", comp_name=comp_name, dataset_name=dataset_element.name
1161
+ "1-1-1-10",
1162
+ comp_name=comp_name,
1163
+ dataset_name=dataset_element.name,
1134
1164
  )
1135
1165
  if dpr_info is not None and dpr_info["signature_type"] == "variable":
1136
1166
  for i, comp_name in enumerate(node.children[2:]):
@@ -1164,7 +1194,9 @@ class InterpreterAnalyzer(ASTTemplate):
1164
1194
 
1165
1195
  # Datapoint Ruleset final evaluation
1166
1196
  return Check_Datapoint.analyze(
1167
- dataset_element=dataset_element, rule_info=rule_output_values, output=output
1197
+ dataset_element=dataset_element,
1198
+ rule_info=rule_output_values,
1199
+ output=output,
1168
1200
  )
1169
1201
  elif node.op in (CHECK_HIERARCHY, HIERARCHY):
1170
1202
  if len(node.children) == 3:
@@ -1203,7 +1235,10 @@ class InterpreterAnalyzer(ASTTemplate):
1203
1235
  and hr_info["signature"] != component
1204
1236
  ):
1205
1237
  raise SemanticError(
1206
- "1-1-10-3", op=node.op, found=component, expected=hr_info["signature"]
1238
+ "1-1-10-3",
1239
+ op=node.op,
1240
+ found=component,
1241
+ expected=hr_info["signature"],
1207
1242
  )
1208
1243
  elif hr_info["node"].signature_type == "valuedomain" and component is None:
1209
1244
  raise SemanticError("1-1-10-4", op=node.op)
@@ -1215,7 +1250,10 @@ class InterpreterAnalyzer(ASTTemplate):
1215
1250
  and cond_components[i] != cond_comp
1216
1251
  ):
1217
1252
  raise SemanticError(
1218
- "1-1-10-6", op=node.op, expected=cond_comp, found=cond_components[i]
1253
+ "1-1-10-6",
1254
+ op=node.op,
1255
+ expected=cond_comp,
1256
+ found=cond_components[i],
1219
1257
  )
1220
1258
  cond_info[cond_comp] = cond_components[i]
1221
1259
 
@@ -1270,7 +1308,9 @@ class InterpreterAnalyzer(ASTTemplate):
1270
1308
  # Final evaluation
1271
1309
  if node.op == CHECK_HIERARCHY:
1272
1310
  result = Check_Hierarchy.analyze(
1273
- dataset_element=dataset, rule_info=rule_output_values, output=output
1311
+ dataset_element=dataset,
1312
+ rule_info=rule_output_values,
1313
+ output=output,
1274
1314
  )
1275
1315
  del rule_output_values
1276
1316
  else:
@@ -1396,10 +1436,12 @@ class InterpreterAnalyzer(ASTTemplate):
1396
1436
  left_operand.data = pd.DataFrame({measure_name: []})
1397
1437
  if right_operand.data is None:
1398
1438
  right_operand.data = pd.DataFrame({measure_name: []})
1399
- left_null_indexes = set(left_operand.data[left_operand.data[
1400
- measure_name].isnull()].index)
1401
- right_null_indexes = set(right_operand.data[right_operand.data[
1402
- measure_name].isnull()].index)
1439
+ left_null_indexes = set(
1440
+ left_operand.data[left_operand.data[measure_name].isnull()].index
1441
+ )
1442
+ right_null_indexes = set(
1443
+ right_operand.data[right_operand.data[measure_name].isnull()].index
1444
+ )
1403
1445
  # If no indexes are in common, then one datapoint is not null
1404
1446
  invalid_indexes = list(left_null_indexes.intersection(right_null_indexes))
1405
1447
  if len(invalid_indexes) > 0:
@@ -1415,7 +1457,6 @@ class InterpreterAnalyzer(ASTTemplate):
1415
1457
  return HR_UNARY_MAPPING[node.op].analyze(operand)
1416
1458
 
1417
1459
  def visit_Validation(self, node: AST.Validation) -> Dataset:
1418
-
1419
1460
  validation_element = self.visit(node.validation)
1420
1461
  if not isinstance(validation_element, Dataset):
1421
1462
  raise ValueError(f"Expected dataset, got {type(validation_element).__name__}")
@@ -1532,7 +1573,10 @@ class InterpreterAnalyzer(ASTTemplate):
1532
1573
  components.update(
1533
1574
  {
1534
1575
  name: Component(
1535
- name=name, data_type=BASIC_TYPES[int], role=Role.MEASURE, nullable=True
1576
+ name=name,
1577
+ data_type=BASIC_TYPES[int],
1578
+ role=Role.MEASURE,
1579
+ nullable=True,
1536
1580
  )
1537
1581
  }
1538
1582
  )
@@ -1555,7 +1599,7 @@ class InterpreterAnalyzer(ASTTemplate):
1555
1599
  )
1556
1600
  merge_index = merge_dataset.data[merge_dataset.get_measures_names()[0]].to_list()
1557
1601
  ids = merge_dataset.get_identifiers_names()
1558
- if isinstance(left_operand, Dataset | DataComponent):
1602
+ if isinstance(left_operand, (Dataset, DataComponent)):
1559
1603
  if left_operand.data is None:
1560
1604
  return left_operand, right_operand
1561
1605
  if isinstance(left_operand, Dataset):
@@ -1571,7 +1615,7 @@ class InterpreterAnalyzer(ASTTemplate):
1571
1615
  else:
1572
1616
  left = left_operand.data
1573
1617
  left_operand.data = left.reindex(merge_index, fill_value=None)
1574
- if isinstance(right_operand, Dataset | DataComponent):
1618
+ if isinstance(right_operand, (Dataset, DataComponent)):
1575
1619
  if right_operand.data is None:
1576
1620
  return left_operand, right_operand
1577
1621
  if isinstance(right_operand, Dataset):
@@ -1737,9 +1781,7 @@ class InterpreterAnalyzer(ASTTemplate):
1737
1781
  signature_values[param["name"]] = self.visit(node.params[i])
1738
1782
  elif param["type"] in ["Dataset", "Component"]:
1739
1783
  if isinstance(node.params[i], AST.VarID):
1740
- signature_values[param["name"]] = node.params[
1741
- i
1742
- ].value # type: ignore[attr-defined]
1784
+ signature_values[param["name"]] = node.params[i].value # type: ignore[attr-defined]
1743
1785
  else:
1744
1786
  param_element = self.visit(node.params[i])
1745
1787
  if isinstance(param_element, Dataset):
@@ -1834,5 +1876,8 @@ class InterpreterAnalyzer(ASTTemplate):
1834
1876
  operand = self.visit(node.operand)
1835
1877
 
1836
1878
  return Time_Aggregation.analyze(
1837
- operand=operand, period_from=node.period_from, period_to=node.period_to, conf=node.conf
1879
+ operand=operand,
1880
+ period_from=node.period_from,
1881
+ period_to=node.period_to,
1882
+ conf=node.conf,
1838
1883
  )
@@ -229,23 +229,21 @@ class Dataset:
229
229
  self.data[comp.name] = self.data[comp.name].astype(str)
230
230
  other.data[comp.name] = other.data[comp.name].astype(str)
231
231
  self.data[comp.name] = self.data[comp.name].map(
232
- lambda x: str(TimePeriodHandler(x)) if x != "" else "", na_action="ignore"
232
+ lambda x: str(TimePeriodHandler(x)) if x != "" else "",
233
+ na_action="ignore",
233
234
  )
234
235
  other.data[comp.name] = other.data[comp.name].map(
235
- lambda x: str(TimePeriodHandler(x)) if x != "" else "", na_action="ignore"
236
+ lambda x: str(TimePeriodHandler(x)) if x != "" else "",
237
+ na_action="ignore",
236
238
  )
237
239
  elif type_name in ["Integer", "Number"]:
238
240
  type_ = "int64" if type_name == "Integer" else "float32"
239
241
  # We use here a number to avoid errors on equality on empty strings
240
242
  self.data[comp.name] = (
241
- self.data[comp.name]
242
- .replace("", -1234997)
243
- .astype(type_) # type: ignore[call-overload]
243
+ self.data[comp.name].replace("", -1234997).astype(type_) # type: ignore[call-overload]
244
244
  )
245
245
  other.data[comp.name] = (
246
- other.data[comp.name]
247
- .replace("", -1234997)
248
- .astype(type_) # type: ignore[call-overload]
246
+ other.data[comp.name].replace("", -1234997).astype(type_) # type: ignore[call-overload]
249
247
  )
250
248
  try:
251
249
  assert_frame_equal(
@@ -334,7 +332,7 @@ class Dataset:
334
332
  return {
335
333
  "name": self.name,
336
334
  "components": {k: v.to_dict() for k, v in self.components.items()},
337
- "data": self.data.to_dict(orient="records") if self.data is not None else None,
335
+ "data": (self.data.to_dict(orient="records") if self.data is not None else None),
338
336
  }
339
337
 
340
338
  def to_json(self) -> str:
@@ -29,8 +29,8 @@ from vtlengine.DataTypes import (
29
29
  unary_implicit_promotion,
30
30
  )
31
31
  from vtlengine.DataTypes.TimeHandling import (
32
- DURATION_MAPPING,
33
- DURATION_MAPPING_REVERSED,
32
+ PERIOD_IND_MAPPING,
33
+ PERIOD_IND_MAPPING_REVERSE,
34
34
  TimeIntervalHandler,
35
35
  TimePeriodHandler,
36
36
  )
@@ -89,7 +89,10 @@ class Aggregation(Operator.Unary):
89
89
  data[measure.name] = (
90
90
  data[measure.name]
91
91
  .astype(object)
92
- .map(lambda x: TimeIntervalHandler.from_iso_format(x), na_action="ignore")
92
+ .map(
93
+ lambda x: TimeIntervalHandler.from_iso_format(x),
94
+ na_action="ignore",
95
+ )
93
96
  )
94
97
  else:
95
98
  data[measure.name] = data[measure.name].map(
@@ -100,11 +103,11 @@ class Aggregation(Operator.Unary):
100
103
  elif measure.data_type == Duration:
101
104
  if mode == "input":
102
105
  data[measure.name] = data[measure.name].map(
103
- lambda x: DURATION_MAPPING[x], na_action="ignore"
106
+ lambda x: PERIOD_IND_MAPPING[x], na_action="ignore"
104
107
  )
105
108
  else:
106
109
  data[measure.name] = data[measure.name].map(
107
- lambda x: DURATION_MAPPING_REVERSED[x], na_action="ignore"
110
+ lambda x: PERIOD_IND_MAPPING_REVERSE[x], na_action="ignore"
108
111
  )
109
112
  elif measure.data_type == Boolean:
110
113
  if mode == "result":
@@ -128,7 +131,10 @@ class Aggregation(Operator.Unary):
128
131
  for comp_name in grouping_columns:
129
132
  if comp_name not in operand.components:
130
133
  raise SemanticError(
131
- "1-1-1-10", op=cls.op, comp_name=comp_name, dataset_name=operand.name
134
+ "1-1-1-10",
135
+ op=cls.op,
136
+ comp_name=comp_name,
137
+ dataset_name=operand.name,
132
138
  )
133
139
  if operand.components[comp_name].role != Role.IDENTIFIER:
134
140
  raise SemanticError(
@@ -223,7 +229,7 @@ class Aggregation(Operator.Unary):
223
229
  )
224
230
 
225
231
  try:
226
- return duckdb.query(query).to_df()
232
+ return duckdb.query(query).to_df().astype(object)
227
233
  except RuntimeError as e:
228
234
  if "Conversion" in e.args[0]:
229
235
  raise SemanticError("2-3-8", op=cls.op, msg=e.args[0].split(":")[-1])
@@ -29,10 +29,17 @@ from vtlengine.AST.Grammar.tokens import (
29
29
  VAR_POP,
30
30
  VAR_SAMP,
31
31
  )
32
- from vtlengine.DataTypes import COMP_NAME_MAPPING, Integer, Number, unary_implicit_promotion
32
+ from vtlengine.DataTypes import (
33
+ COMP_NAME_MAPPING,
34
+ Integer,
35
+ Number,
36
+ unary_implicit_promotion,
37
+ )
33
38
  from vtlengine.Exceptions import SemanticError
34
39
  from vtlengine.Model import Component, Dataset, Role
35
40
 
41
+ return_integer_operators = [MAX, MIN, SUM]
42
+
36
43
 
37
44
  # noinspection PyMethodOverriding
38
45
  class Analytic(Operator.Unary):
@@ -47,10 +54,11 @@ class Analytic(Operator.Unary):
47
54
  Evaluate: Ensures the type of data is the correct one to perform the Analytic operators.
48
55
  """
49
56
 
57
+ return_integer = None
50
58
  sql_op: Optional[str] = None
51
59
 
52
60
  @classmethod
53
- def validate( # type: ignore[override]
61
+ def validate( # type: ignore[override] # noqa: C901
54
62
  cls,
55
63
  operand: Dataset,
56
64
  partitioning: List[str],
@@ -66,7 +74,10 @@ class Analytic(Operator.Unary):
66
74
  for comp_name in partitioning:
67
75
  if comp_name not in operand.components:
68
76
  raise SemanticError(
69
- "1-1-1-10", op=cls.op, comp_name=comp_name, dataset_name=operand.name
77
+ "1-1-1-10",
78
+ op=cls.op,
79
+ comp_name=comp_name,
80
+ dataset_name=operand.name,
70
81
  )
71
82
  if comp_name not in identifier_names:
72
83
  raise SemanticError(
@@ -78,14 +89,21 @@ class Analytic(Operator.Unary):
78
89
  for comp_name in order_components:
79
90
  if comp_name not in operand.components:
80
91
  raise SemanticError(
81
- "1-1-1-10", op=cls.op, comp_name=comp_name, dataset_name=operand.name
92
+ "1-1-1-10",
93
+ op=cls.op,
94
+ comp_name=comp_name,
95
+ dataset_name=operand.name,
82
96
  )
83
97
  if component_name is not None:
84
98
  if cls.type_to_check is not None:
85
99
  unary_implicit_promotion(
86
100
  operand.components[component_name].data_type, cls.type_to_check
87
101
  )
88
- if cls.return_type is not None:
102
+
103
+ if cls.op in return_integer_operators:
104
+ cls.return_integer = isinstance(cls.return_type, Integer)
105
+
106
+ elif cls.return_type is not None:
89
107
  result_components[component_name] = Component(
90
108
  name=component_name,
91
109
  data_type=cls.return_type,
@@ -106,14 +124,28 @@ class Analytic(Operator.Unary):
106
124
  measures = operand.get_measures()
107
125
  if len(measures) == 0:
108
126
  raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
127
+
128
+ if cls.op in return_integer_operators:
129
+ isNumber = False
130
+ for measure in measures:
131
+ isNumber |= isinstance(measure.data_type, Number)
132
+ cls.return_integer = not isNumber
133
+
109
134
  if cls.type_to_check is not None:
110
135
  for measure in measures:
111
136
  unary_implicit_promotion(measure.data_type, cls.type_to_check)
112
- if cls.return_type is not None:
137
+
138
+ if cls.op in return_integer_operators:
139
+ for measure in measures:
140
+ new_measure = copy(measure)
141
+ new_measure.data_type = Integer if cls.return_integer else Number
142
+ result_components[measure.name] = new_measure
143
+ elif cls.return_type is not None:
113
144
  for measure in measures:
114
145
  new_measure = copy(measure)
115
146
  new_measure.data_type = cls.return_type
116
147
  result_components[measure.name] = new_measure
148
+
117
149
  if cls.op == COUNT and len(measures) <= 1:
118
150
  measure_name = COMP_NAME_MAPPING[cls.return_type]
119
151
  nullable = False if len(measures) == 0 else measures[0].nullable
@@ -199,6 +231,8 @@ class Analytic(Operator.Unary):
199
231
  measure_query = f"{cls.sql_op}({measure})"
200
232
  if cls.op == COUNT and len(measure_names) == 1:
201
233
  measure_query += f" {analytic_str} as {COMP_NAME_MAPPING[cls.return_type]}"
234
+ elif cls.op in return_integer_operators and cls.return_integer:
235
+ measure_query = f"CAST({measure_query} {analytic_str} AS INTEGER) as {measure}"
202
236
  else:
203
237
  measure_query += f" {analytic_str} as {measure}"
204
238
  measure_queries.append(measure_query)
@@ -215,7 +249,7 @@ class Analytic(Operator.Unary):
215
249
  df[measure_names] = df[measure_names].fillna(-1)
216
250
  # if os.getenv("SPARK", False):
217
251
  # df = df.to_pandas()
218
- return duckdb.query(query).to_df()
252
+ return duckdb.query(query).to_df().astype(object)
219
253
 
220
254
  @classmethod
221
255
  def evaluate( # type: ignore[override]
@@ -245,6 +279,10 @@ class Analytic(Operator.Unary):
245
279
  window=window,
246
280
  params=params,
247
281
  )
282
+
283
+ # if cls.return_type == Integer:
284
+ # result.data[measure_names] = result.data[measure_names].astype('Int64')
285
+
248
286
  return result
249
287
 
250
288
 
@@ -255,6 +293,7 @@ class Max(Analytic):
255
293
 
256
294
  op = MAX
257
295
  sql_op = "MAX"
296
+ return_integer = False
258
297
 
259
298
 
260
299
  class Min(Analytic):
@@ -264,6 +303,7 @@ class Min(Analytic):
264
303
 
265
304
  op = MIN
266
305
  sql_op = "MIN"
306
+ return_integer = False
267
307
 
268
308
 
269
309
  class Sum(Analytic):
@@ -272,9 +312,8 @@ class Sum(Analytic):
272
312
  """
273
313
 
274
314
  op = SUM
275
- type_to_check = Number
276
- return_type = Number
277
315
  sql_op = "SUM"
316
+ return_integer = False
278
317
 
279
318
 
280
319
  class Count(Analytic):
@@ -8,7 +8,6 @@ ALL_MODEL_TYPES = Union[DataComponent, Dataset]
8
8
 
9
9
 
10
10
  class Assignment(Binary):
11
-
12
11
  @classmethod
13
12
  def validate(cls, left_operand: Any, right_operand: Any) -> ALL_MODEL_TYPES:
14
13
  if (