vtlengine 1.0.3rc3__py3-none-any.whl → 1.1rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vtlengine might be problematic. Click here for more details.

Files changed (48) hide show
  1. vtlengine/API/_InternalApi.py +64 -58
  2. vtlengine/API/__init__.py +11 -2
  3. vtlengine/API/data/schema/json_schema_2.1.json +116 -0
  4. vtlengine/AST/ASTConstructor.py +5 -4
  5. vtlengine/AST/ASTConstructorModules/Expr.py +47 -48
  6. vtlengine/AST/ASTConstructorModules/ExprComponents.py +45 -23
  7. vtlengine/AST/ASTConstructorModules/Terminals.py +21 -11
  8. vtlengine/AST/ASTEncoders.py +1 -1
  9. vtlengine/AST/DAG/__init__.py +0 -3
  10. vtlengine/AST/Grammar/lexer.py +0 -1
  11. vtlengine/AST/Grammar/parser.py +185 -440
  12. vtlengine/AST/VtlVisitor.py +0 -1
  13. vtlengine/DataTypes/TimeHandling.py +50 -15
  14. vtlengine/DataTypes/__init__.py +79 -7
  15. vtlengine/Exceptions/__init__.py +3 -5
  16. vtlengine/Exceptions/messages.py +65 -105
  17. vtlengine/Interpreter/__init__.py +83 -38
  18. vtlengine/Model/__init__.py +7 -9
  19. vtlengine/Operators/Aggregation.py +13 -7
  20. vtlengine/Operators/Analytic.py +48 -9
  21. vtlengine/Operators/Assignment.py +0 -1
  22. vtlengine/Operators/CastOperator.py +44 -44
  23. vtlengine/Operators/Clause.py +16 -10
  24. vtlengine/Operators/Comparison.py +20 -12
  25. vtlengine/Operators/Conditional.py +30 -13
  26. vtlengine/Operators/General.py +9 -4
  27. vtlengine/Operators/HROperators.py +4 -14
  28. vtlengine/Operators/Join.py +15 -14
  29. vtlengine/Operators/Numeric.py +32 -26
  30. vtlengine/Operators/RoleSetter.py +6 -2
  31. vtlengine/Operators/Set.py +12 -8
  32. vtlengine/Operators/String.py +9 -9
  33. vtlengine/Operators/Time.py +136 -116
  34. vtlengine/Operators/Validation.py +10 -4
  35. vtlengine/Operators/__init__.py +56 -69
  36. vtlengine/Utils/__init__.py +6 -1
  37. vtlengine/__extras_check.py +17 -0
  38. vtlengine/files/output/__init__.py +2 -1
  39. vtlengine/files/output/_time_period_representation.py +2 -1
  40. vtlengine/files/parser/__init__.py +47 -31
  41. vtlengine/files/parser/_rfc_dialect.py +1 -1
  42. vtlengine/files/parser/_time_checking.py +4 -4
  43. {vtlengine-1.0.3rc3.dist-info → vtlengine-1.1rc1.dist-info}/METADATA +17 -17
  44. vtlengine-1.1rc1.dist-info/RECORD +59 -0
  45. {vtlengine-1.0.3rc3.dist-info → vtlengine-1.1rc1.dist-info}/WHEEL +1 -1
  46. vtlengine/DataTypes/NumericTypesHandling.py +0 -38
  47. vtlengine-1.0.3rc3.dist-info/RECORD +0 -58
  48. {vtlengine-1.0.3rc3.dist-info → vtlengine-1.1rc1.dist-info}/LICENSE.md +0 -0
@@ -8,7 +8,20 @@ from typing import Any, Optional, Union
8
8
  # import pandas as pd
9
9
  import pandas as pd
10
10
 
11
- from vtlengine.AST.Grammar.tokens import AND, CEIL, EQ, FLOOR, GT, GTE, LT, LTE, NEQ, OR, ROUND, XOR
11
+ from vtlengine.AST.Grammar.tokens import (
12
+ AND,
13
+ CEIL,
14
+ EQ,
15
+ FLOOR,
16
+ GT,
17
+ GTE,
18
+ LT,
19
+ LTE,
20
+ NEQ,
21
+ OR,
22
+ ROUND,
23
+ XOR,
24
+ )
12
25
  from vtlengine.DataTypes import (
13
26
  COMP_NAME_MAPPING,
14
27
  SCALAR_TYPES_CLASS_REVERSE,
@@ -18,7 +31,7 @@ from vtlengine.DataTypes import (
18
31
  unary_implicit_promotion,
19
32
  )
20
33
  from vtlengine.DataTypes.TimeHandling import (
21
- DURATION_MAPPING,
34
+ PERIOD_IND_MAPPING,
22
35
  TimeIntervalHandler,
23
36
  TimePeriodHandler,
24
37
  )
@@ -54,7 +67,6 @@ class Operator:
54
67
 
55
68
  @classmethod
56
69
  def cast_time_types(cls, data_type: Any, series: Any) -> Any:
57
-
58
70
  if cls.op not in BINARY_COMPARISON_OPERATORS:
59
71
  return series
60
72
  if data_type.__name__ == "TimeInterval":
@@ -64,7 +76,7 @@ class Operator:
64
76
  elif data_type.__name__ == "TimePeriod":
65
77
  series = series.map(lambda x: TimePeriodHandler(x), na_action="ignore")
66
78
  elif data_type.__name__ == "Duration":
67
- series = series.map(lambda x: DURATION_MAPPING[x], na_action="ignore")
79
+ series = series.map(lambda x: PERIOD_IND_MAPPING[x], na_action="ignore")
68
80
  return series
69
81
 
70
82
  @classmethod
@@ -76,9 +88,9 @@ class Operator:
76
88
  elif data_type.__name__ == "TimePeriod":
77
89
  return TimePeriodHandler(value)
78
90
  elif data_type.__name__ == "Duration":
79
- if value not in DURATION_MAPPING:
91
+ if value not in PERIOD_IND_MAPPING:
80
92
  raise Exception(f"Duration {value} is not valid")
81
- return DURATION_MAPPING[value]
93
+ return PERIOD_IND_MAPPING[value]
82
94
  return value
83
95
 
84
96
  @classmethod
@@ -165,11 +177,11 @@ class Operator:
165
177
 
166
178
 
167
179
  def _id_type_promotion_join_keys(
168
- c_left: Component,
169
- c_right: Component,
170
- join_key: str,
171
- left_data: Optional[pd.DataFrame] = None,
172
- right_data: Optional[pd.DataFrame] = None,
180
+ c_left: Component,
181
+ c_right: Component,
182
+ join_key: str,
183
+ left_data: Optional[pd.DataFrame] = None,
184
+ right_data: Optional[pd.DataFrame] = None,
173
185
  ) -> None:
174
186
  if left_data is None:
175
187
  left_data = pd.DataFrame()
@@ -184,7 +196,7 @@ def _id_type_promotion_join_keys(
184
196
  right_data[join_key] = right_data[join_key].astype(object)
185
197
  return
186
198
  if (left_type_name == "Integer" and right_type_name == "Number") or (
187
- left_type_name == "Number" and right_type_name == "Integer"
199
+ left_type_name == "Number" and right_type_name == "Integer"
188
200
  ):
189
201
  left_data[join_key] = left_data[join_key].map(lambda x: int(float(x)))
190
202
  right_data[join_key] = right_data[join_key].map(lambda x: int(float(x)))
@@ -209,7 +221,6 @@ def _handle_str_number(x: Union[str, int, float]) -> Union[str, int, float]:
209
221
 
210
222
 
211
223
  class Binary(Operator):
212
-
213
224
  @classmethod
214
225
  def op_func(cls, *args: Any) -> Any:
215
226
  x, y = args
@@ -220,7 +231,6 @@ class Binary(Operator):
220
231
 
221
232
  @classmethod
222
233
  def apply_operation_two_series(cls, left_series: Any, right_series: Any) -> Any:
223
-
224
234
  if os.getenv("SPARK", False):
225
235
  if cls.spark_op is None:
226
236
  cls.spark_op = cls.py_op
@@ -234,12 +244,11 @@ class Binary(Operator):
234
244
 
235
245
  @classmethod
236
246
  def apply_operation_series_scalar(
237
- cls,
238
- series: Any,
239
- scalar: Scalar,
240
- series_left: bool,
247
+ cls,
248
+ series: Any,
249
+ scalar: Scalar,
250
+ series_left: bool,
241
251
  ) -> Any:
242
-
243
252
  if scalar is None:
244
253
  return pd.Series(None, index=series.index)
245
254
  if series_left:
@@ -280,7 +289,6 @@ class Binary(Operator):
280
289
 
281
290
  @classmethod
282
291
  def dataset_validation(cls, left_operand: Dataset, right_operand: Dataset) -> Dataset:
283
-
284
292
  left_identifiers = left_operand.get_identifiers_names()
285
293
  right_identifiers = right_operand.get_identifiers_names()
286
294
 
@@ -293,7 +301,10 @@ class Binary(Operator):
293
301
 
294
302
  if left_measures_names != right_measures_names:
295
303
  raise SemanticError(
296
- "1-1-14-1", op=cls.op, left=left_measures_names, right=right_measures_names
304
+ "1-1-14-1",
305
+ op=cls.op,
306
+ left=left_measures_names,
307
+ right=right_measures_names,
297
308
  )
298
309
  elif len(left_measures) == 0:
299
310
  raise SemanticError("1-1-1-8", op=cls.op, name=left_operand.name)
@@ -331,7 +342,6 @@ class Binary(Operator):
331
342
 
332
343
  @classmethod
333
344
  def dataset_scalar_validation(cls, dataset: Dataset, scalar: Scalar) -> Dataset:
334
-
335
345
  if len(dataset.get_measures()) == 0:
336
346
  raise SemanticError("1-1-1-8", op=cls.op, name=dataset.name)
337
347
 
@@ -346,11 +356,12 @@ class Binary(Operator):
346
356
 
347
357
  @classmethod
348
358
  def scalar_validation(cls, left_operand: Scalar, right_operand: Scalar) -> Scalar:
349
-
350
359
  if not cls.validate_type_compatibility(left_operand.data_type, right_operand.data_type):
351
360
  raise SemanticError(
352
- "1-1-1-2", type_1=left_operand.data_type, type_2=right_operand.data_type,
353
- type_check=cls.type_to_check
361
+ "1-1-1-2",
362
+ type_1=left_operand.data_type,
363
+ type_2=right_operand.data_type,
364
+ type_check=cls.type_to_check,
354
365
  )
355
366
  return Scalar(
356
367
  name="result",
@@ -360,7 +371,7 @@ class Binary(Operator):
360
371
 
361
372
  @classmethod
362
373
  def component_validation(
363
- cls, left_operand: DataComponent, right_operand: DataComponent
374
+ cls, left_operand: DataComponent, right_operand: DataComponent
364
375
  ) -> DataComponent:
365
376
  """
366
377
  Validates the compatibility between the types of the components and the operator
@@ -382,7 +393,6 @@ class Binary(Operator):
382
393
 
383
394
  @classmethod
384
395
  def component_scalar_validation(cls, component: DataComponent, scalar: Scalar) -> DataComponent:
385
-
386
396
  cls.type_validation(component.data_type, scalar.data_type)
387
397
  result = DataComponent(
388
398
  name=component.name,
@@ -395,7 +405,6 @@ class Binary(Operator):
395
405
 
396
406
  @classmethod
397
407
  def dataset_set_validation(cls, dataset: Dataset, scalar_set: ScalarSet) -> Dataset:
398
-
399
408
  if len(dataset.get_measures()) == 0:
400
409
  raise SemanticError("1-1-1-8", op=cls.op, name=dataset.name)
401
410
  for measure in dataset.get_measures():
@@ -412,9 +421,8 @@ class Binary(Operator):
412
421
 
413
422
  @classmethod
414
423
  def component_set_validation(
415
- cls, component: DataComponent, scalar_set: ScalarSet
424
+ cls, component: DataComponent, scalar_set: ScalarSet
416
425
  ) -> DataComponent:
417
-
418
426
  cls.type_validation(component.data_type, scalar_set.data_type)
419
427
  result = DataComponent(
420
428
  name="result",
@@ -427,7 +435,6 @@ class Binary(Operator):
427
435
 
428
436
  @classmethod
429
437
  def scalar_set_validation(cls, scalar: Scalar, scalar_set: ScalarSet) -> Scalar:
430
-
431
438
  cls.type_validation(scalar.data_type, scalar_set.data_type)
432
439
  return Scalar(
433
440
  name="result",
@@ -468,7 +475,7 @@ class Binary(Operator):
468
475
 
469
476
  @classmethod
470
477
  def apply_return_type_dataset(
471
- cls, result_dataset: Dataset, left_operand: Any, right_operand: Any
478
+ cls, result_dataset: Dataset, left_operand: Any, right_operand: Any
472
479
  ) -> None:
473
480
  """
474
481
  Used in dataset's validation.
@@ -498,9 +505,9 @@ class Binary(Operator):
498
505
  if result_dataset.data is not None:
499
506
  result_dataset.data.rename(columns={measure.name: component.name}, inplace=True)
500
507
  elif (
501
- changed_allowed is False
502
- and is_mono_measure is False
503
- and left_type.promotion_changed_type(result_data_type)
508
+ changed_allowed is False
509
+ and is_mono_measure is False
510
+ and left_type.promotion_changed_type(result_data_type)
504
511
  ):
505
512
  raise SemanticError("1-1-1-4", op=cls.op)
506
513
  else:
@@ -508,7 +515,6 @@ class Binary(Operator):
508
515
 
509
516
  @classmethod
510
517
  def dataset_evaluation(cls, left_operand: Dataset, right_operand: Dataset) -> Dataset:
511
-
512
518
  result_dataset = cls.dataset_validation(left_operand, right_operand)
513
519
 
514
520
  use_right_as_base = False
@@ -587,16 +593,14 @@ class Binary(Operator):
587
593
 
588
594
  @classmethod
589
595
  def scalar_evaluation(cls, left_operand: Scalar, right_operand: Scalar) -> Scalar:
590
-
591
596
  result_scalar = cls.scalar_validation(left_operand, right_operand)
592
597
  result_scalar.value = cls.op_func(left_operand.value, right_operand.value)
593
598
  return result_scalar
594
599
 
595
600
  @classmethod
596
601
  def dataset_scalar_evaluation(
597
- cls, dataset: Dataset, scalar: Scalar, dataset_left: bool = True
602
+ cls, dataset: Dataset, scalar: Scalar, dataset_left: bool = True
598
603
  ) -> Dataset:
599
-
600
604
  result_dataset = cls.dataset_scalar_validation(dataset, scalar)
601
605
  result_data = dataset.data.copy() if dataset.data is not None else pd.DataFrame()
602
606
  result_dataset.data = result_data
@@ -606,9 +610,9 @@ class Binary(Operator):
606
610
  for measure in dataset.get_measures():
607
611
  measure_data = cls.cast_time_types(measure.data_type, result_data[measure.name].copy())
608
612
  if measure.data_type.__name__.__str__() == "Duration" and not isinstance(
609
- scalar_value, int
613
+ scalar_value, int
610
614
  ):
611
- scalar_value = DURATION_MAPPING[scalar_value]
615
+ scalar_value = PERIOD_IND_MAPPING[scalar_value]
612
616
  result_dataset.data[measure.name] = cls.apply_operation_series_scalar(
613
617
  measure_data, scalar_value, dataset_left
614
618
  )
@@ -621,9 +625,8 @@ class Binary(Operator):
621
625
 
622
626
  @classmethod
623
627
  def component_evaluation(
624
- cls, left_operand: DataComponent, right_operand: DataComponent
628
+ cls, left_operand: DataComponent, right_operand: DataComponent
625
629
  ) -> DataComponent:
626
-
627
630
  result_component = cls.component_validation(left_operand, right_operand)
628
631
  left_data = cls.cast_time_types(
629
632
  left_operand.data_type,
@@ -631,16 +634,15 @@ class Binary(Operator):
631
634
  )
632
635
  right_data = cls.cast_time_types(
633
636
  right_operand.data_type,
634
- right_operand.data.copy() if right_operand.data is not None else pd.Series(),
637
+ (right_operand.data.copy() if right_operand.data is not None else pd.Series()),
635
638
  )
636
639
  result_component.data = cls.apply_operation_two_series(left_data, right_data)
637
640
  return result_component
638
641
 
639
642
  @classmethod
640
643
  def component_scalar_evaluation(
641
- cls, component: DataComponent, scalar: Scalar, component_left: bool = True
644
+ cls, component: DataComponent, scalar: Scalar, component_left: bool = True
642
645
  ) -> DataComponent:
643
-
644
646
  result_component = cls.component_scalar_validation(component, scalar)
645
647
  comp_data = cls.cast_time_types(
646
648
  component.data_type,
@@ -648,9 +650,9 @@ class Binary(Operator):
648
650
  )
649
651
  scalar_value = cls.cast_time_types_scalar(scalar.data_type, scalar.value)
650
652
  if component.data_type.__name__.__str__() == "Duration" and not isinstance(
651
- scalar_value, int
653
+ scalar_value, int
652
654
  ):
653
- scalar_value = DURATION_MAPPING[scalar_value]
655
+ scalar_value = PERIOD_IND_MAPPING[scalar_value]
654
656
  result_component.data = cls.apply_operation_series_scalar(
655
657
  comp_data, scalar_value, component_left
656
658
  )
@@ -658,7 +660,6 @@ class Binary(Operator):
658
660
 
659
661
  @classmethod
660
662
  def dataset_set_evaluation(cls, dataset: Dataset, scalar_set: ScalarSet) -> Dataset:
661
-
662
663
  result_dataset = cls.dataset_set_validation(dataset, scalar_set)
663
664
  result_data = dataset.data.copy() if dataset.data is not None else pd.DataFrame()
664
665
 
@@ -676,18 +677,17 @@ class Binary(Operator):
676
677
 
677
678
  @classmethod
678
679
  def component_set_evaluation(
679
- cls, component: DataComponent, scalar_set: ScalarSet
680
+ cls, component: DataComponent, scalar_set: ScalarSet
680
681
  ) -> DataComponent:
681
-
682
682
  result_component = cls.component_set_validation(component, scalar_set)
683
683
  result_component.data = cls.apply_operation_two_series(
684
- component.data.copy() if component.data is not None else pd.Series(), scalar_set
684
+ component.data.copy() if component.data is not None else pd.Series(),
685
+ scalar_set,
685
686
  )
686
687
  return result_component
687
688
 
688
689
  @classmethod
689
690
  def scalar_set_evaluation(cls, scalar: Scalar, scalar_set: ScalarSet) -> Scalar:
690
-
691
691
  result_scalar = cls.scalar_set_validation(scalar, scalar_set)
692
692
  result_scalar.value = cls.op_func(scalar.value, scalar_set)
693
693
  return result_scalar
@@ -726,7 +726,6 @@ class Binary(Operator):
726
726
 
727
727
 
728
728
  class Unary(Operator):
729
-
730
729
  @classmethod
731
730
  def op_func(cls, *args: Any) -> Any:
732
731
  x = args[0]
@@ -758,7 +757,6 @@ class Unary(Operator):
758
757
 
759
758
  @classmethod
760
759
  def dataset_validation(cls, operand: Dataset) -> Dataset:
761
-
762
760
  cls.validate_dataset_type(operand)
763
761
  if len(operand.get_measures()) == 0:
764
762
  raise SemanticError("1-1-1-8", op=cls.op, name=operand.name)
@@ -774,14 +772,12 @@ class Unary(Operator):
774
772
 
775
773
  @classmethod
776
774
  def scalar_validation(cls, operand: Scalar) -> Scalar:
777
-
778
775
  result_type = cls.type_validation(operand.data_type)
779
776
  result = Scalar(name="result", data_type=result_type, value=None)
780
777
  return result
781
778
 
782
779
  @classmethod
783
780
  def component_validation(cls, operand: DataComponent) -> DataComponent:
784
-
785
781
  result_type = cls.type_validation(operand.data_type)
786
782
  result = DataComponent(
787
783
  name="result",
@@ -795,18 +791,15 @@ class Unary(Operator):
795
791
  # The following class method implements the type promotion
796
792
  @classmethod
797
793
  def type_validation(cls, operand: Any) -> Any:
798
-
799
794
  return unary_implicit_promotion(operand, cls.type_to_check, cls.return_type)
800
795
 
801
796
  # The following class method checks the type promotion
802
797
  @classmethod
803
798
  def validate_type_compatibility(cls, operand: Any) -> bool:
804
-
805
799
  return check_unary_implicit_promotion(operand, cls.type_to_check, cls.return_type)
806
800
 
807
801
  @classmethod
808
802
  def validate_dataset_type(cls, dataset: Dataset) -> None:
809
-
810
803
  if cls.type_to_check is not None:
811
804
  for measure in dataset.get_measures():
812
805
  if not cls.validate_type_compatibility(measure.data_type):
@@ -820,7 +813,6 @@ class Unary(Operator):
820
813
 
821
814
  @classmethod
822
815
  def validate_scalar_type(cls, scalar: Scalar) -> None:
823
-
824
816
  if cls.type_to_check is not None and not cls.validate_type_compatibility(scalar.data_type):
825
817
  raise SemanticError(
826
818
  "1-1-1-5",
@@ -831,7 +823,6 @@ class Unary(Operator):
831
823
 
832
824
  @classmethod
833
825
  def apply_return_type_dataset(cls, result_dataset: Dataset, operand: Dataset) -> None:
834
-
835
826
  changed_allowed = cls.op in MONOMEASURE_CHANGED_ALLOWED
836
827
  is_mono_measure = len(operand.get_measures()) == 1
837
828
  for measure in result_dataset.get_measures():
@@ -850,9 +841,9 @@ class Unary(Operator):
850
841
  if result_dataset.data is not None:
851
842
  result_dataset.data.rename(columns={measure.name: component.name}, inplace=True)
852
843
  elif (
853
- changed_allowed is False
854
- and is_mono_measure is False
855
- and operand_type.promotion_changed_type(result_data_type)
844
+ changed_allowed is False
845
+ and is_mono_measure is False
846
+ and operand_type.promotion_changed_type(result_data_type)
856
847
  ):
857
848
  raise SemanticError("1-1-1-4", op=cls.op)
858
849
  else:
@@ -860,7 +851,6 @@ class Unary(Operator):
860
851
 
861
852
  @classmethod
862
853
  def evaluate(cls, operand: ALL_MODEL_DATA_TYPES) -> Any:
863
-
864
854
  if isinstance(operand, Dataset):
865
855
  return cls.dataset_evaluation(operand)
866
856
  if isinstance(operand, Scalar):
@@ -870,7 +860,6 @@ class Unary(Operator):
870
860
 
871
861
  @classmethod
872
862
  def dataset_evaluation(cls, operand: Dataset) -> Dataset:
873
-
874
863
  result_dataset = cls.dataset_validation(operand)
875
864
  result_data = operand.data.copy() if operand.data is not None else pd.DataFrame()
876
865
  for measure_name in operand.get_measures_names():
@@ -885,14 +874,12 @@ class Unary(Operator):
885
874
 
886
875
  @classmethod
887
876
  def scalar_evaluation(cls, operand: Scalar) -> Scalar:
888
-
889
877
  result_scalar = cls.scalar_validation(operand)
890
878
  result_scalar.value = cls.op_func(operand.value)
891
879
  return result_scalar
892
880
 
893
881
  @classmethod
894
882
  def component_evaluation(cls, operand: DataComponent) -> DataComponent:
895
-
896
883
  result_component = cls.component_validation(operand)
897
884
  result_component.data = cls.apply_operation_component(
898
885
  operand.data.copy() if operand.data is not None else pd.Series()
@@ -339,7 +339,12 @@ REGULAR_AGGREGATION_MAPPING = {
339
339
  APPLY: Apply,
340
340
  }
341
341
 
342
- SET_MAPPING = {UNION: Union, INTERSECT: Intersection, SYMDIFF: Symdiff, SETDIFF: Setdiff}
342
+ SET_MAPPING = {
343
+ UNION: Union,
344
+ INTERSECT: Intersection,
345
+ SYMDIFF: Symdiff,
346
+ SETDIFF: Setdiff,
347
+ }
343
348
 
344
349
  AGGREGATION_MAPPING = {
345
350
  MAX: Max,
@@ -0,0 +1,17 @@
1
+ import importlib.util
2
+
3
+ EXTRAS_DOCS = "https://docs.vtlengine.meaningfuldata.eu/#installation"
4
+ ERROR_MESSAGE = (
5
+ "The '{extra_name}' extra is required to run {extra_desc}. "
6
+ "Please install it using 'pip install vtlengine[{extra_name}]' or "
7
+ "install all extras with 'pip install vtlengine[all]'. "
8
+ f"Check the documentation at: {EXTRAS_DOCS}"
9
+ )
10
+
11
+
12
+ def __check_s3_extra() -> None:
13
+ package_loc = importlib.util.find_spec("s3fs")
14
+ if package_loc is None:
15
+ raise ImportError(
16
+ ERROR_MESSAGE.format(extra_name="s3", extra_desc="over csv files using S3 URIs")
17
+ ) from None
@@ -3,6 +3,7 @@ from typing import Optional, Union
3
3
 
4
4
  import pandas as pd
5
5
 
6
+ from vtlengine.__extras_check import __check_s3_extra
6
7
  from vtlengine.files.output._time_period_representation import (
7
8
  TimePeriodRepresentation,
8
9
  format_time_period_external_representation,
@@ -15,12 +16,12 @@ def save_datapoints(
15
16
  dataset: Dataset,
16
17
  output_path: Union[str, Path],
17
18
  ) -> None:
18
-
19
19
  if dataset.data is None:
20
20
  dataset.data = pd.DataFrame()
21
21
  if time_period_representation is not None:
22
22
  format_time_period_external_representation(dataset, time_period_representation)
23
23
  if isinstance(output_path, str):
24
+ __check_s3_extra()
24
25
  if output_path.endswith("/"):
25
26
  s3_file_output = output_path + f"{dataset.name}.csv"
26
27
  else:
@@ -1,4 +1,5 @@
1
1
  from enum import Enum
2
+ from typing import Union
2
3
 
3
4
  from vtlengine.DataTypes import TimePeriod
4
5
  from vtlengine.DataTypes.TimeHandling import TimePeriodHandler
@@ -23,7 +24,7 @@ def _format_vtl_representation(value: str) -> str:
23
24
 
24
25
 
25
26
  def format_time_period_external_representation(
26
- dataset: Dataset | Scalar, mode: TimePeriodRepresentation
27
+ dataset: Union[Dataset, Scalar], mode: TimePeriodRepresentation
27
28
  ) -> None:
28
29
  """
29
30
  From SDMX time period representation to standard VTL representation (no hyphen).
@@ -17,10 +17,14 @@ from vtlengine.DataTypes import (
17
17
  TimeInterval,
18
18
  TimePeriod,
19
19
  )
20
- from vtlengine.DataTypes.TimeHandling import DURATION_MAPPING
20
+ from vtlengine.DataTypes.TimeHandling import PERIOD_IND_MAPPING
21
21
  from vtlengine.Exceptions import InputValidationException, SemanticError
22
22
  from vtlengine.files.parser._rfc_dialect import register_rfc
23
- from vtlengine.files.parser._time_checking import check_date, check_time, check_time_period
23
+ from vtlengine.files.parser._time_checking import (
24
+ check_date,
25
+ check_time,
26
+ check_time_period,
27
+ )
24
28
  from vtlengine.Model import Component, Dataset, Role
25
29
 
26
30
  TIME_CHECKS_MAPPING: Dict[Type[ScalarType], Any] = {
@@ -73,8 +77,11 @@ def _sanitize_pandas_columns(
73
77
  components: Dict[str, Component], csv_path: Union[str, Path], data: pd.DataFrame
74
78
  ) -> pd.DataFrame:
75
79
  # Fast loading from SDMX-CSV
76
- if ("DATAFLOW" in data.columns and data.columns[0] == "DATAFLOW" and
77
- "DATAFLOW" not in components):
80
+ if (
81
+ "DATAFLOW" in data.columns
82
+ and data.columns[0] == "DATAFLOW"
83
+ and "DATAFLOW" not in components
84
+ ):
78
85
  data.drop(columns=["DATAFLOW"], inplace=True)
79
86
  if "STRUCTURE" in data.columns and data.columns[0] == "STRUCTURE":
80
87
  if "STRUCTURE" not in components:
@@ -102,32 +109,23 @@ def _sanitize_pandas_columns(
102
109
  return data
103
110
 
104
111
 
105
- def _pandas_load_csv(components: Dict[str, Component], csv_path: Path) -> pd.DataFrame:
112
+ def _pandas_load_csv(components: Dict[str, Component], csv_path: Union[str, Path]) -> pd.DataFrame:
106
113
  obj_dtypes = {comp_name: np.object_ for comp_name, comp in components.items()}
107
114
 
108
115
  try:
109
116
  data = pd.read_csv(
110
- csv_path, dtype=obj_dtypes, engine="c", keep_default_na=False, na_values=[""]
117
+ csv_path,
118
+ dtype=obj_dtypes,
119
+ engine="c",
120
+ keep_default_na=False,
121
+ na_values=[""],
111
122
  )
112
123
  except UnicodeDecodeError:
113
- raise InputValidationException(code="0-1-2-5", file=csv_path.name)
114
-
115
- return _sanitize_pandas_columns(components, csv_path, data)
116
-
117
-
118
- def _pandas_load_s3_csv(components: Dict[str, Component], csv_path: str) -> pd.DataFrame:
119
- obj_dtypes = {comp_name: np.object_ for comp_name, comp in components.items()}
120
-
121
- # start = time()
122
- try:
123
- data = pd.read_csv(
124
- csv_path, dtype=obj_dtypes, engine="c", keep_default_na=False, na_values=[""]
125
- )
124
+ if isinstance(csv_path, Path):
125
+ raise InputValidationException(code="0-1-2-5", file=csv_path.name)
126
+ else:
127
+ raise InputValidationException(code="0-1-2-5", file=csv_path)
126
128
 
127
- except UnicodeDecodeError:
128
- raise InputValidationException(code="0-1-2-5", file=csv_path)
129
- except Exception as e:
130
- raise InputValidationException(f"ERROR: {str(e)}, review file {str(csv_path)}")
131
129
  return _sanitize_pandas_columns(components, csv_path, data)
132
130
 
133
131
 
@@ -165,7 +163,6 @@ def _validate_pandas(
165
163
  comp_name = ""
166
164
  comp = None
167
165
  try:
168
-
169
166
  for comp_name, comp in components.items():
170
167
  if comp.data_type in (Date, TimePeriod, TimeInterval):
171
168
  data[comp_name] = data[comp_name].map(
@@ -184,16 +181,34 @@ def _validate_pandas(
184
181
  elif comp.data_type == Duration:
185
182
  values_correct = (
186
183
  data[comp_name]
187
- .map(lambda x: x.replace(" ", "") in DURATION_MAPPING, na_action="ignore")
184
+ .map(
185
+ lambda x: Duration.validate_duration(x),
186
+ na_action="ignore",
187
+ )
188
188
  .all()
189
189
  )
190
190
  if not values_correct:
191
- raise ValueError(f"Duration values are not correct in column {comp_name}")
191
+ try:
192
+ values_correct = (
193
+ data[comp_name]
194
+ .map(
195
+ lambda x: x.replace(" ", "") in PERIOD_IND_MAPPING,
196
+ na_action="ignore",
197
+ )
198
+ .all()
199
+ )
200
+ if not values_correct:
201
+ raise ValueError(
202
+ f"Duration values are not correct in column {comp_name}"
203
+ )
204
+ except ValueError:
205
+ raise ValueError(f"Duration values are not correct in column {comp_name}")
192
206
  else:
193
207
  data[comp_name] = data[comp_name].map(
194
208
  lambda x: str(x).replace('"', ""), na_action="ignore"
195
209
  )
196
210
  data[comp_name] = data[comp_name].astype(np.object_, errors="raise")
211
+
197
212
  except ValueError:
198
213
  str_comp = SCALAR_TYPES_CLASS_REVERSE[comp.data_type] if comp else "Null"
199
214
  raise SemanticError("0-1-1-12", name=dataset_name, column=comp_name, type=str_comp)
@@ -202,14 +217,15 @@ def _validate_pandas(
202
217
 
203
218
 
204
219
  def load_datapoints(
205
- components: Dict[str, Component], dataset_name: str, csv_path: Optional[Union[Path, str]] = None
220
+ components: Dict[str, Component],
221
+ dataset_name: str,
222
+ csv_path: Optional[Union[Path, str]] = None,
206
223
  ) -> pd.DataFrame:
207
224
  if csv_path is None or (isinstance(csv_path, Path) and not csv_path.exists()):
208
225
  return pd.DataFrame(columns=list(components.keys()))
209
- elif isinstance(csv_path, str):
210
- data = _pandas_load_s3_csv(components, csv_path)
211
- elif isinstance(csv_path, Path):
212
- _validate_csv_path(components, csv_path)
226
+ elif isinstance(csv_path, (str, Path)):
227
+ if isinstance(csv_path, Path):
228
+ _validate_csv_path(components, csv_path)
213
229
  data = _pandas_load_csv(components, csv_path)
214
230
  else:
215
231
  raise Exception("Invalid csv_path type")
@@ -19,4 +19,4 @@ class RFCDialect(csv.Dialect):
19
19
 
20
20
  def register_rfc() -> None:
21
21
  """Register the RFC dialect."""
22
- csv.register_dialect("rfc", RFCDialect)
22
+ csv.register_dialect("rfc", RFCDialect) # type: ignore[arg-type]
@@ -21,16 +21,16 @@ def check_date(value: str) -> str:
21
21
  raise InputValidationException(f"Date {value} is out of range for the month.")
22
22
  if "month must be in 1..12" in str(e):
23
23
  raise InputValidationException(
24
- f"Date {value} is invalid. " f"Month must be between 1 and 12."
24
+ f"Date {value} is invalid. Month must be between 1 and 12."
25
25
  )
26
26
  raise InputValidationException(
27
- f"Date {value} is not in the correct format. " f"Use YYYY-MM-DD."
27
+ f"Date {value} is not in the correct format. Use YYYY-MM-DD."
28
28
  )
29
29
 
30
30
  # Check date is between 1900 and 9999
31
31
  if not 1800 <= date_value.year <= 9999:
32
32
  raise InputValidationException(
33
- f"Date {value} is invalid. " f"Year must be between 1900 and 9999."
33
+ f"Date {value} is invalid. Year must be between 1900 and 9999."
34
34
  )
35
35
 
36
36
  return date_value.isoformat()
@@ -68,7 +68,7 @@ def check_time(value: str) -> str:
68
68
  raise ValueError("Start date is greater than end date.")
69
69
  return value
70
70
  raise ValueError(
71
- "Time is not in the correct format. " "Use YYYY-MM-DD/YYYY-MM-DD or YYYY or YYYY-MM."
71
+ "Time is not in the correct format. Use YYYY-MM-DD/YYYY-MM-DD or YYYY or YYYY-MM."
72
72
  )
73
73
 
74
74