vtlengine 1.0.3rc3__py3-none-any.whl → 1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vtlengine might be problematic. Click here for more details.

Files changed (53) hide show
  1. vtlengine/API/_InternalApi.py +288 -61
  2. vtlengine/API/__init__.py +269 -71
  3. vtlengine/API/data/schema/json_schema_2.1.json +116 -0
  4. vtlengine/AST/ASTComment.py +56 -0
  5. vtlengine/AST/ASTConstructor.py +76 -22
  6. vtlengine/AST/ASTConstructorModules/Expr.py +238 -120
  7. vtlengine/AST/ASTConstructorModules/ExprComponents.py +126 -61
  8. vtlengine/AST/ASTConstructorModules/Terminals.py +97 -42
  9. vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
  10. vtlengine/AST/ASTEncoders.py +5 -1
  11. vtlengine/AST/ASTString.py +608 -0
  12. vtlengine/AST/ASTTemplate.py +28 -2
  13. vtlengine/AST/DAG/__init__.py +10 -4
  14. vtlengine/AST/Grammar/lexer.py +0 -1
  15. vtlengine/AST/Grammar/parser.py +185 -440
  16. vtlengine/AST/VtlVisitor.py +0 -1
  17. vtlengine/AST/__init__.py +127 -14
  18. vtlengine/DataTypes/TimeHandling.py +50 -15
  19. vtlengine/DataTypes/__init__.py +79 -7
  20. vtlengine/Exceptions/__init__.py +3 -5
  21. vtlengine/Exceptions/messages.py +74 -105
  22. vtlengine/Interpreter/__init__.py +136 -46
  23. vtlengine/Model/__init__.py +14 -11
  24. vtlengine/Operators/Aggregation.py +17 -9
  25. vtlengine/Operators/Analytic.py +64 -20
  26. vtlengine/Operators/Assignment.py +0 -1
  27. vtlengine/Operators/CastOperator.py +44 -44
  28. vtlengine/Operators/Clause.py +16 -10
  29. vtlengine/Operators/Comparison.py +20 -12
  30. vtlengine/Operators/Conditional.py +47 -15
  31. vtlengine/Operators/General.py +9 -4
  32. vtlengine/Operators/HROperators.py +4 -14
  33. vtlengine/Operators/Join.py +15 -14
  34. vtlengine/Operators/Numeric.py +32 -26
  35. vtlengine/Operators/RoleSetter.py +6 -2
  36. vtlengine/Operators/Set.py +12 -8
  37. vtlengine/Operators/String.py +9 -9
  38. vtlengine/Operators/Time.py +145 -124
  39. vtlengine/Operators/Validation.py +10 -4
  40. vtlengine/Operators/__init__.py +56 -69
  41. vtlengine/Utils/__init__.py +55 -1
  42. vtlengine/__extras_check.py +17 -0
  43. vtlengine/__init__.py +2 -2
  44. vtlengine/files/output/__init__.py +2 -1
  45. vtlengine/files/output/_time_period_representation.py +2 -1
  46. vtlengine/files/parser/__init__.py +52 -46
  47. vtlengine/files/parser/_time_checking.py +4 -4
  48. {vtlengine-1.0.3rc3.dist-info → vtlengine-1.1.dist-info}/METADATA +21 -17
  49. vtlengine-1.1.dist-info/RECORD +61 -0
  50. {vtlengine-1.0.3rc3.dist-info → vtlengine-1.1.dist-info}/WHEEL +1 -1
  51. vtlengine/DataTypes/NumericTypesHandling.py +0 -38
  52. vtlengine-1.0.3rc3.dist-info/RECORD +0 -58
  53. {vtlengine-1.0.3rc3.dist-info → vtlengine-1.1.dist-info}/LICENSE.md +0 -0
@@ -1,17 +1,33 @@
1
1
  import json
2
2
  import os
3
3
  from pathlib import Path
4
- from typing import Any, Dict, List, Optional, Union
4
+ from typing import Any, Dict, List, Literal, Optional, Union
5
5
 
6
+ import jsonschema
6
7
  import pandas as pd
7
- from s3fs import S3FileSystem # type: ignore[import-untyped]
8
+ from pysdmx.model.dataflow import Component as SDMXComponent
9
+ from pysdmx.model.dataflow import DataStructureDefinition, Schema
10
+ from pysdmx.model.dataflow import Role as SDMX_Role
11
+ from pysdmx.model.vtl import (
12
+ Ruleset,
13
+ RulesetScheme,
14
+ Transformation,
15
+ TransformationScheme,
16
+ UserDefinedOperator,
17
+ UserDefinedOperatorScheme,
18
+ )
8
19
 
9
- from vtlengine.AST import PersistentAssignment, Start
20
+ from vtlengine import AST as AST
21
+ from vtlengine.__extras_check import __check_s3_extra
22
+ from vtlengine.AST import Assignment, DPRuleset, HRuleset, Operator, PersistentAssignment, Start
23
+ from vtlengine.AST.ASTString import ASTString
10
24
  from vtlengine.DataTypes import SCALAR_TYPES
11
- from vtlengine.Exceptions import check_key
25
+ from vtlengine.Exceptions import InputValidationException, check_key
12
26
  from vtlengine.files.parser import _fill_dataset_empty_data, _validate_pandas
13
27
  from vtlengine.Model import (
14
- Component,
28
+ Component as VTL_Component,
29
+ )
30
+ from vtlengine.Model import (
15
31
  Dataset,
16
32
  ExternalRoutine,
17
33
  Role,
@@ -19,15 +35,13 @@ from vtlengine.Model import (
19
35
  Scalar,
20
36
  ValueDomain,
21
37
  )
38
+ from vtlengine.Utils import VTL_DTYPES_MAPPING, VTL_ROLE_MAPPING
22
39
 
23
40
  base_path = Path(__file__).parent
24
- filepath_VTL = base_path / "data" / "vtl"
25
- filepath_ValueDomains = base_path / "data" / "ValueDomain"
26
- filepath_sql = base_path / "data" / "sql"
27
- filepath_json = base_path / "data" / "DataStructure" / "input"
28
- filepath_csv = base_path / "data" / "DataSet" / "input"
29
- filepath_out_json = base_path / "data" / "DataStructure" / "output"
30
- filepath_out_csv = base_path / "data" / "DataSet" / "output"
41
+ schema_path = base_path / "data" / "schema"
42
+ sdmx_csv_path = base_path / "data" / "sdmx_csv"
43
+ with open(schema_path / "json_schema_2.1.json", "r") as file:
44
+ schema = json.load(file)
31
45
 
32
46
 
33
47
  def _load_dataset_from_structure(structures: Dict[str, Any]) -> Dict[str, Any]:
@@ -41,22 +55,60 @@ def _load_dataset_from_structure(structures: Dict[str, Any]) -> Dict[str, Any]:
41
55
  dataset_name = dataset_json["name"]
42
56
  components = {}
43
57
 
44
- for component in dataset_json["DataStructure"]:
45
- check_key("data_type", SCALAR_TYPES.keys(), component["type"])
46
- check_key("role", Role_keys, component["role"])
47
- components[component["name"]] = Component(
48
- name=component["name"],
49
- data_type=SCALAR_TYPES[component["type"]],
50
- role=Role(component["role"]),
51
- nullable=component["nullable"],
52
- )
58
+ if "structure" in dataset_json:
59
+ structure_name = dataset_json["structure"]
60
+ structure_json = None
61
+ for s in structures["structures"]:
62
+ if s["name"] == structure_name:
63
+ structure_json = s
64
+ if structure_json is None:
65
+ raise InputValidationException(code="0-3-1-1", message="Structure not found.")
66
+ try:
67
+ jsonschema.validate(instance=structure_json, schema=schema)
68
+ except jsonschema.exceptions.ValidationError as e:
69
+ raise InputValidationException(code="0-3-1-1", message=e.message)
70
+
71
+ for component in structure_json["components"]:
72
+ check_key("data_type", SCALAR_TYPES.keys(), component["data_type"])
73
+ if component["role"] == "ViralAttribute":
74
+ component["role"] = "Attribute"
75
+
76
+ check_key("role", Role_keys, component["role"])
77
+
78
+ if "nullable" not in component:
79
+ if Role(component["role"]) == Role.IDENTIFIER:
80
+ component["nullable"] = False
81
+ elif Role(component["role"]) in (Role.MEASURE, Role.ATTRIBUTE):
82
+ component["nullable"] = True
83
+ else:
84
+ component["nullable"] = False
85
+
86
+ components[component["name"]] = VTL_Component(
87
+ name=component["name"],
88
+ data_type=SCALAR_TYPES[component["data_type"]],
89
+ role=Role(component["role"]),
90
+ nullable=component["nullable"],
91
+ )
92
+
93
+ if "DataStructure" in dataset_json:
94
+ for component in dataset_json["DataStructure"]:
95
+ check_key("data_type", SCALAR_TYPES.keys(), component["type"])
96
+ check_key("role", Role_keys, component["role"])
97
+ components[component["name"]] = VTL_Component(
98
+ name=component["name"],
99
+ data_type=SCALAR_TYPES[component["type"]],
100
+ role=Role(component["role"]),
101
+ nullable=component["nullable"],
102
+ )
53
103
 
54
104
  datasets[dataset_name] = Dataset(name=dataset_name, components=components, data=None)
55
105
  if "scalars" in structures:
56
106
  for scalar_json in structures["scalars"]:
57
107
  scalar_name = scalar_json["name"]
58
108
  scalar = Scalar(
59
- name=scalar_name, data_type=SCALAR_TYPES[scalar_json["type"]], value=None
109
+ name=scalar_name,
110
+ data_type=SCALAR_TYPES[scalar_json["type"]],
111
+ value=None,
60
112
  )
61
113
  datasets[scalar_name] = scalar # type: ignore[assignment]
62
114
  return datasets
@@ -70,38 +122,16 @@ def _load_single_datapoint(datapoint: Union[str, Path]) -> Dict[str, Any]:
70
122
  raise Exception("Invalid datapoint. Input must be a Path or an S3 URI")
71
123
  if isinstance(datapoint, str):
72
124
  if "s3://" in datapoint:
73
- # Handling S3 URI
74
- s3fs_obj = S3FileSystem()
75
-
76
- # Check if the S3 URI is valid
77
- if not s3fs_obj.exists(datapoint):
78
- raise Exception(
79
- f"Invalid datapoint. S3 URI does not exist or it is not accessible: {datapoint}"
80
- )
81
-
82
- # Check if the S3 URI is a directory
83
- if s3fs_obj.isdir(datapoint):
84
- datapoints: Dict[str, Any] = {}
85
- for f in s3fs_obj.ls(datapoint):
86
- if f.endswith(".csv"):
87
- dataset_name = f.split("/")[-1].removesuffix(".csv")
88
- dict_data = {dataset_name: f"s3://{f}"}
89
- datapoints = {**datapoints, **dict_data}
90
- return datapoints
91
-
92
- # Check if the S3 URI is a csv file
93
- if s3fs_obj.isfile(datapoint) and not datapoint.endswith(".csv"):
94
- raise Exception(f"Invalid datapoint. S3 URI must refer to a csv file: {datapoint}")
125
+ __check_s3_extra()
95
126
  dataset_name = datapoint.split("/")[-1].removesuffix(".csv")
96
127
  dict_data = {dataset_name: datapoint}
97
128
  return dict_data
98
-
99
129
  try:
100
130
  datapoint = Path(datapoint)
101
131
  except Exception:
102
132
  raise Exception("Invalid datapoint. Input must refer to a Path or an S3 URI")
103
133
  if datapoint.is_dir():
104
- datapoints = {}
134
+ datapoints: Dict[str, Any] = {}
105
135
  for f in datapoint.iterdir():
106
136
  if f.suffix != ".csv":
107
137
  continue
@@ -115,7 +145,7 @@ def _load_single_datapoint(datapoint: Union[str, Path]) -> Dict[str, Any]:
115
145
 
116
146
 
117
147
  def _load_datapoints_path(
118
- datapoints: Union[Path, str, List[Union[str, Path]]]
148
+ datapoints: Union[Path, str, List[Union[str, Path]]],
119
149
  ) -> Dict[str, Dataset]:
120
150
  """
121
151
  Returns a dict with the data given from a Path.
@@ -156,7 +186,7 @@ def _load_datastructure_single(data_structure: Union[Dict[str, Any], Path]) -> D
156
186
 
157
187
 
158
188
  def load_datasets(
159
- data_structure: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]]
189
+ data_structure: Union[Dict[str, Any], Path, List[Dict[str, Any]], List[Path]],
160
190
  ) -> Dict[str, Dataset]:
161
191
  """
162
192
  Loads multiple datasets.
@@ -365,25 +395,222 @@ def _check_output_folder(output_folder: Union[str, Path]) -> None:
365
395
  """
366
396
  if isinstance(output_folder, str):
367
397
  if "s3://" in output_folder:
368
- s3fs_obj = S3FileSystem()
369
- # Check if the S3 URI is valid
370
- if not s3fs_obj.exists(output_folder):
371
- try:
372
- s3fs_obj.mkdir(output_folder)
373
- except Exception:
374
- raise Exception(
375
- f"Invalid output folder. S3 URI is invalid or "
376
- f"it is not accessible: {output_folder}"
377
- )
398
+ __check_s3_extra()
399
+ if not output_folder.endswith("/"):
400
+ raise ValueError("Output folder must be a Path or S3 URI to a directory")
378
401
  return
379
402
  try:
380
403
  output_folder = Path(output_folder)
381
404
  except Exception:
382
- raise Exception("Output folder must be a Path or S3 URI to a directory")
405
+ raise ValueError("Output folder must be a Path or S3 URI to a directory")
383
406
 
384
407
  if not isinstance(output_folder, Path):
385
- raise Exception("Output folder must be a Path or S3 URI to a directory")
408
+ raise ValueError("Output folder must be a Path or S3 URI to a directory")
386
409
  if not output_folder.exists():
387
410
  if output_folder.suffix != "":
388
- raise Exception("Output folder must be a Path or S3 URI to a directory")
411
+ raise ValueError("Output folder must be a Path or S3 URI to a directory")
389
412
  os.mkdir(output_folder)
413
+
414
+
415
+ def to_vtl_json(dsd: Union[DataStructureDefinition, Schema], dataset_name: str) -> Dict[str, Any]:
416
+ """
417
+ Converts a pysdmx `DataStructureDefinition` or `Schema` into a VTL-compatible JSON
418
+ representation.
419
+
420
+ This function extracts and transforms the components (dimensions, measures, and attributes)
421
+ from the given SDMX data structure and maps them into a dictionary format that conforms
422
+ to the expected VTL data structure json schema.
423
+
424
+ Args:
425
+ dsd: An instance of `DataStructureDefinition` or `Schema` from the `pysdmx` model.
426
+ dataset_name: The name of the resulting VTL dataset.
427
+
428
+ Returns:
429
+ A dictionary representing the dataset in VTL format, with keys for dataset name and its
430
+ components, including their name, role, data type, and nullability.
431
+ """
432
+ components = []
433
+ NAME = "name"
434
+ ROLE = "role"
435
+ TYPE = "type"
436
+ NULLABLE = "nullable"
437
+
438
+ _components: List[SDMXComponent] = []
439
+ _components.extend(dsd.components.dimensions)
440
+ _components.extend(dsd.components.measures)
441
+ _components.extend(dsd.components.attributes)
442
+
443
+ for c in _components:
444
+ _type = VTL_DTYPES_MAPPING[c.dtype]
445
+ _nullability = c.role != SDMX_Role.DIMENSION
446
+ _role = VTL_ROLE_MAPPING[c.role]
447
+
448
+ component = {
449
+ NAME: c.id,
450
+ ROLE: _role,
451
+ TYPE: _type,
452
+ NULLABLE: _nullability,
453
+ }
454
+
455
+ components.append(component)
456
+
457
+ result = {"datasets": [{"name": dataset_name, "DataStructure": components}]}
458
+
459
+ return result
460
+
461
+
462
+ def __generate_transformation(
463
+ child: Union[Assignment, PersistentAssignment], is_persistent: bool, count: int
464
+ ) -> Transformation:
465
+ expression = ASTString().render(ast=child.right)
466
+ result = child.left.value # type: ignore[attr-defined]
467
+ return Transformation(
468
+ id=f"T{count}",
469
+ expression=expression,
470
+ is_persistent=is_persistent,
471
+ result=result,
472
+ name=f"Transformation {result}",
473
+ )
474
+
475
+
476
+ def __generate_udo(child: Operator, count: int) -> UserDefinedOperator:
477
+ operator_definition = ASTString().render(ast=child)
478
+ return UserDefinedOperator(
479
+ id=f"UDO{count}",
480
+ operator_definition=operator_definition,
481
+ name=f"UserDefinedOperator {child.op}",
482
+ )
483
+
484
+
485
+ def __generate_ruleset(child: Union[DPRuleset, HRuleset], count: int) -> Ruleset:
486
+ ruleset_definition = ASTString().render(ast=child)
487
+ ruleset_type: Literal["datapoint", "hierarchical"] = (
488
+ "datapoint" if isinstance(child, DPRuleset) else "hierarchical"
489
+ )
490
+ return Ruleset(
491
+ id=f"R{count}",
492
+ ruleset_definition=ruleset_definition,
493
+ ruleset_type=ruleset_type,
494
+ name=f"{ruleset_type.capitalize()} ruleset {child.name}",
495
+ )
496
+
497
+
498
+ def ast_to_sdmx(ast: AST.Start, agency_id: str, id: str, version: str) -> TransformationScheme:
499
+ """
500
+ Converts a vtl AST into an SDMX compatible `TransformationScheme` object, following
501
+ the pysdmx model.
502
+
503
+ This function iterates over the child nodes of the given AST and categorizes each into one of
504
+ the following types:
505
+ - `PersistentAssignment`: Represents a persistent transformation. These are added to the
506
+ transformation list with a persistence flag.
507
+ - `Assignment`: Represents a temporary (non-persistent) transformation. These are added to the
508
+ transformation list without the persistence flag
509
+ - `DPRuleset` or `HRuleset`: Represent validation rule sets.
510
+ These are collected and wrapped into a `RulesetScheme` object.
511
+ - `Operator`: Defines user-defined operators. These are collected
512
+ into a `UserDefinedOperatorScheme` object.
513
+
514
+ After parsing all AST elements:
515
+ - If any rulesets were found, a `RulesetScheme` is created and added to the references.
516
+ - If any user-defined operators were found, a `UserDefinedOperatorScheme` is created and added
517
+ to the references.
518
+ - A `TransformationScheme` object is constructed with all collected transformations and any
519
+ additional references.
520
+
521
+ Args:
522
+ ast: The root node of the vtl ast representing the set of
523
+ vtl expressions.
524
+ agency_id: The identifier of the agency defining the SDMX structure as a string.
525
+ id: The identifier of the transformation scheme as a string.
526
+ version: The version of the transformation scheme given as a string.
527
+
528
+ Returns:
529
+ TransformationScheme: A fully constructed transformation scheme that includes
530
+ transformations, and optionally rule sets and user-defined operator schemes,
531
+ suitable for SDMX.
532
+
533
+ """
534
+ list_transformation = []
535
+ list_udos = []
536
+ list_rulesets = []
537
+ count_transformation = 0
538
+ count_udo = 0
539
+ count_ruleset = 0
540
+
541
+ for child in ast.children:
542
+ if isinstance(child, PersistentAssignment):
543
+ count_transformation += 1
544
+ list_transformation.append(
545
+ __generate_transformation(
546
+ child=child, is_persistent=True, count=count_transformation
547
+ )
548
+ )
549
+ elif isinstance(child, Assignment):
550
+ count_transformation += 1
551
+ list_transformation.append(
552
+ __generate_transformation(
553
+ child=child, is_persistent=False, count=count_transformation
554
+ )
555
+ )
556
+ elif isinstance(child, (DPRuleset, HRuleset)):
557
+ count_ruleset += 1
558
+ list_rulesets.append(__generate_ruleset(child=child, count=count_ruleset))
559
+ elif isinstance(child, Operator):
560
+ count_udo += 1
561
+ list_udos.append(__generate_udo(child=child, count=count_udo))
562
+
563
+ references: Any = {}
564
+ if list_rulesets:
565
+ references["ruleset_schemes"] = [
566
+ RulesetScheme(
567
+ items=list_rulesets,
568
+ agency=agency_id,
569
+ id="RS1",
570
+ vtl_version="2.1",
571
+ version=version,
572
+ name=f"RulesetScheme {id}-RS",
573
+ )
574
+ ]
575
+ if list_udos:
576
+ references["user_defined_operator_schemes"] = [
577
+ UserDefinedOperatorScheme(
578
+ items=list_udos,
579
+ agency=agency_id,
580
+ id="UDS1",
581
+ vtl_version="2.1",
582
+ version=version,
583
+ name=f"UserDefinedOperatorScheme {id}-UDS",
584
+ )
585
+ ]
586
+
587
+ transformation_scheme = TransformationScheme(
588
+ items=list_transformation,
589
+ agency=agency_id,
590
+ id="TS1",
591
+ vtl_version="2.1",
592
+ version=version,
593
+ name=f"TransformationScheme {id}",
594
+ **references,
595
+ )
596
+
597
+ return transformation_scheme
598
+
599
+
600
+ def _check_script(script: Union[str, TransformationScheme, Path]) -> str:
601
+ """
602
+ Check if the TransformationScheme object is valid to generate a vtl script.
603
+ """
604
+ if not isinstance(script, (str, TransformationScheme, Path)):
605
+ raise Exception(
606
+ "Invalid script format. Input must be a string, TransformationScheme or Path object"
607
+ )
608
+ if isinstance(script, TransformationScheme):
609
+ from pysdmx.toolkit.vtl.generate_vtl_script import (
610
+ generate_vtl_script,
611
+ )
612
+
613
+ vtl_script = generate_vtl_script(script, model_validation=True)
614
+ return vtl_script
615
+ else:
616
+ return str(script)