vtlengine 1.1rc2__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vtlengine might be problematic. Click here for more details.

Files changed (68) hide show
  1. vtlengine-1.2.0/PKG-INFO +92 -0
  2. vtlengine-1.2.0/README.md +54 -0
  3. {vtlengine-1.1rc2 → vtlengine-1.2.0}/pyproject.toml +20 -12
  4. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/API/_InternalApi.py +288 -29
  5. vtlengine-1.2.0/src/vtlengine/API/__init__.py +523 -0
  6. vtlengine-1.2.0/src/vtlengine/AST/ASTComment.py +56 -0
  7. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/AST/ASTConstructor.py +71 -18
  8. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/AST/ASTConstructorModules/Expr.py +197 -75
  9. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/AST/ASTConstructorModules/ExprComponents.py +81 -38
  10. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/AST/ASTConstructorModules/Terminals.py +76 -31
  11. vtlengine-1.2.0/src/vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
  12. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/AST/ASTEncoders.py +4 -0
  13. vtlengine-1.2.0/src/vtlengine/AST/ASTString.py +622 -0
  14. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/AST/ASTTemplate.py +28 -2
  15. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/AST/DAG/__init__.py +44 -6
  16. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/AST/DAG/_words.py +1 -0
  17. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/AST/Grammar/Vtl.g4 +7 -7
  18. vtlengine-1.2.0/src/vtlengine/AST/Grammar/lexer.py +20785 -0
  19. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/AST/Grammar/parser.py +17996 -3199
  20. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/AST/__init__.py +127 -14
  21. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Exceptions/messages.py +14 -2
  22. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Interpreter/__init__.py +90 -11
  23. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Model/__init__.py +9 -4
  24. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Operators/Aggregation.py +13 -6
  25. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Operators/Analytic.py +19 -13
  26. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Operators/CastOperator.py +5 -2
  27. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Operators/Clause.py +26 -18
  28. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Operators/Comparison.py +3 -1
  29. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Operators/Conditional.py +40 -18
  30. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Operators/General.py +3 -1
  31. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Operators/HROperators.py +3 -1
  32. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Operators/Join.py +4 -2
  33. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Operators/Time.py +22 -15
  34. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Operators/Validation.py +5 -2
  35. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Operators/__init__.py +15 -8
  36. vtlengine-1.2.0/src/vtlengine/Utils/__Virtual_Assets.py +34 -0
  37. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Utils/__init__.py +49 -0
  38. vtlengine-1.2.0/src/vtlengine/__init__.py +5 -0
  39. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/files/parser/__init__.py +16 -26
  40. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/files/parser/_rfc_dialect.py +1 -1
  41. vtlengine-1.1rc2/PKG-INFO +0 -248
  42. vtlengine-1.1rc2/README.md +0 -213
  43. vtlengine-1.1rc2/src/vtlengine/API/__init__.py +0 -316
  44. vtlengine-1.1rc2/src/vtlengine/AST/Grammar/lexer.py +0 -2138
  45. vtlengine-1.1rc2/src/vtlengine/__init__.py +0 -3
  46. {vtlengine-1.1rc2 → vtlengine-1.2.0}/LICENSE.md +0 -0
  47. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/API/data/schema/json_schema_2.1.json +0 -0
  48. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/AST/ASTDataExchange.py +0 -0
  49. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/AST/ASTVisitor.py +0 -0
  50. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/AST/Grammar/VtlTokens.g4 +0 -0
  51. {vtlengine-1.1rc2/src/vtlengine/AST/ASTConstructorModules → vtlengine-1.2.0/src/vtlengine/AST/Grammar}/__init__.py +0 -0
  52. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/AST/Grammar/tokens.py +0 -0
  53. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/AST/VtlVisitor.py +0 -0
  54. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/DataTypes/TimeHandling.py +0 -0
  55. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/DataTypes/__init__.py +0 -0
  56. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Exceptions/__init__.py +0 -0
  57. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Operators/Assignment.py +0 -0
  58. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Operators/Boolean.py +0 -0
  59. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Operators/Numeric.py +0 -0
  60. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Operators/RoleSetter.py +0 -0
  61. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Operators/Set.py +0 -0
  62. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/Operators/String.py +0 -0
  63. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/__extras_check.py +0 -0
  64. {vtlengine-1.1rc2/src/vtlengine/AST/Grammar → vtlengine-1.2.0/src/vtlengine/files}/__init__.py +0 -0
  65. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/files/output/__init__.py +0 -0
  66. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/files/output/_time_period_representation.py +0 -0
  67. {vtlengine-1.1rc2 → vtlengine-1.2.0}/src/vtlengine/files/parser/_time_checking.py +0 -0
  68. /vtlengine-1.1rc2/src/vtlengine/files/__init__.py → /vtlengine-1.2.0/src/vtlengine/py.typed +0 -0
@@ -0,0 +1,92 @@
1
+ Metadata-Version: 2.3
2
+ Name: vtlengine
3
+ Version: 1.2.0
4
+ Summary: Run and Validate VTL Scripts
5
+ License: AGPL-3.0
6
+ Keywords: vtl,sdmx,vtlengine,Validation and Transformation Language
7
+ Author: MeaningfulData
8
+ Author-email: info@meaningfuldata.eu
9
+ Maintainer: Francisco Javier Hernandez del Caño
10
+ Maintainer-email: javier.hernandez@meaningfuldata.eu
11
+ Requires-Python: >=3.9
12
+ Classifier: Development Status :: 5 - Production/Stable
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Information Technology
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: Typing :: Typed
17
+ Provides-Extra: all
18
+ Provides-Extra: s3
19
+ Requires-Dist: antlr4-python3-runtime (>=4.13.2,<4.14)
20
+ Requires-Dist: duckdb (>=1.1,<1.2)
21
+ Requires-Dist: fsspec (>=2022.11.0,<2023.0) ; extra == "all"
22
+ Requires-Dist: fsspec (>=2022.11.0,<2023.0) ; extra == "s3"
23
+ Requires-Dist: jsonschema (>=3.2.0,<5.0)
24
+ Requires-Dist: networkx (>=2.8,<3.0)
25
+ Requires-Dist: numpy (>=1.23.2,<2) ; python_version < "3.13"
26
+ Requires-Dist: numpy (>=2.1.0) ; python_version >= "3.13"
27
+ Requires-Dist: pandas (>=2.1.4,<3.0)
28
+ Requires-Dist: pysdmx[xml] (>=1.4.0rc1,<2.0)
29
+ Requires-Dist: s3fs (>=2022.11.0,<2023.0) ; extra == "all"
30
+ Requires-Dist: s3fs (>=2022.11.0,<2023.0) ; extra == "s3"
31
+ Requires-Dist: sqlglot (>=22.2.0,<23.0)
32
+ Project-URL: Authors, https://github.com/Meaningful-Data/vtlengine/graphs/contributors
33
+ Project-URL: Documentation, https://docs.vtlengine.meaningfuldata.eu
34
+ Project-URL: IssueTracker, https://github.com/Meaningful-Data/vtlengine/issues
35
+ Project-URL: MeaningfulData, https://www.meaningfuldata.eu/
36
+ Project-URL: Repository, https://github.com/Meaningful-Data/vtlengine
37
+ Description-Content-Type: text/markdown
38
+
39
+ # VTL Engine
40
+
41
+ | | |
42
+ |--------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
43
+ | Testing | [![Testing](https://github.com/Meaningful-Data/vtlengine/actions/workflows/testing.yml/badge.svg)](https://github.com/Meaningful-Data/vtlengine/actions/workflows/testing.yml) |
44
+ | Package | [![PyPI Latest Release](https://img.shields.io/pypi/v/vtlengine.svg)](https://pypi.org/project/vtlengine/) |
45
+ | License | [![License - AGPL 3.0](https://img.shields.io/pypi/l/vtlengine.svg)](https://github.com/Meaningful-Data/vtlengine/blob/main/LICENSE.md) |
46
+ | Mentioned in | [![Mentioned in Awesome Official Statistics ](https://awesome.re/mentioned-badge.svg)](https://github.com/SNStatComp/awesome-official-statistics-software) |
47
+
48
+ ## Introduction
49
+
50
+ The VTL Engine is a Python library that allows you to validate, format and execute VTL scripts.
51
+
52
+ It is a Python-based library around
53
+ the [VTL Language 2.1](https://sdmx-twg.github.io/vtl/2.1/html/index.html).
54
+
55
+ ## Useful Links
56
+
57
+ - [MeaningfulData: who we are](https://www.meaningfuldata.eu)
58
+ - [Documentation](https://docs.vtlengine.meaningfuldata.eu)
59
+ - [Source Code](https://github.com/Meaningful-Data/vtlengine)
60
+ - [Bug Tracker](https://github.com/Meaningful-Data/vtlengine/issues?q=is%3Aopen+is%3Aissue+label%3Abug)
61
+ - [New features Tracker](https://github.com/Meaningful-Data/vtlengine/issues?q=is%3Aopen+is%3Aissue+label%3Aenhancement)
62
+
63
+ ## Installation
64
+
65
+ ### Requirements
66
+
67
+ The VTL Engine requires Python 3.9 or higher.
68
+
69
+ ### Install with pip
70
+
71
+ To install the VTL Engine on any Operating System, you can use pip:
72
+
73
+ ```bash
74
+
75
+ pip install vtlengine
76
+
77
+ ```
78
+
79
+ *Note: it is recommended to install the VTL Engine in a virtual environment.*
80
+
81
+ ### S3 extra
82
+
83
+ If you want to use the S3 functionality, you can install the VTL Engine with the `s3` extra:
84
+
85
+ ```bash
86
+ pip install vtlengine[s3]
87
+ ```
88
+
89
+ ## Documentation
90
+
91
+ The documentation for the VTL Engine is available
92
+ at [docs.vtlengine.meaningfuldata.eu](https://docs.vtlengine.meaningfuldata.eu).
@@ -0,0 +1,54 @@
1
+ # VTL Engine
2
+
3
+ | | |
4
+ |--------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
5
+ | Testing | [![Testing](https://github.com/Meaningful-Data/vtlengine/actions/workflows/testing.yml/badge.svg)](https://github.com/Meaningful-Data/vtlengine/actions/workflows/testing.yml) |
6
+ | Package | [![PyPI Latest Release](https://img.shields.io/pypi/v/vtlengine.svg)](https://pypi.org/project/vtlengine/) |
7
+ | License | [![License - AGPL 3.0](https://img.shields.io/pypi/l/vtlengine.svg)](https://github.com/Meaningful-Data/vtlengine/blob/main/LICENSE.md) |
8
+ | Mentioned in | [![Mentioned in Awesome Official Statistics ](https://awesome.re/mentioned-badge.svg)](https://github.com/SNStatComp/awesome-official-statistics-software) |
9
+
10
+ ## Introduction
11
+
12
+ The VTL Engine is a Python library that allows you to validate, format and execute VTL scripts.
13
+
14
+ It is a Python-based library around
15
+ the [VTL Language 2.1](https://sdmx-twg.github.io/vtl/2.1/html/index.html).
16
+
17
+ ## Useful Links
18
+
19
+ - [MeaningfulData: who we are](https://www.meaningfuldata.eu)
20
+ - [Documentation](https://docs.vtlengine.meaningfuldata.eu)
21
+ - [Source Code](https://github.com/Meaningful-Data/vtlengine)
22
+ - [Bug Tracker](https://github.com/Meaningful-Data/vtlengine/issues?q=is%3Aopen+is%3Aissue+label%3Abug)
23
+ - [New features Tracker](https://github.com/Meaningful-Data/vtlengine/issues?q=is%3Aopen+is%3Aissue+label%3Aenhancement)
24
+
25
+ ## Installation
26
+
27
+ ### Requirements
28
+
29
+ The VTL Engine requires Python 3.9 or higher.
30
+
31
+ ### Install with pip
32
+
33
+ To install the VTL Engine on any Operating System, you can use pip:
34
+
35
+ ```bash
36
+
37
+ pip install vtlengine
38
+
39
+ ```
40
+
41
+ *Note: it is recommended to install the VTL Engine in a virtual environment.*
42
+
43
+ ### S3 extra
44
+
45
+ If you want to use the S3 functionality, you can install the VTL Engine with the `s3` extra:
46
+
47
+ ```bash
48
+ pip install vtlengine[s3]
49
+ ```
50
+
51
+ ## Documentation
52
+
53
+ The documentation for the VTL Engine is available
54
+ at [docs.vtlengine.meaningfuldata.eu](https://docs.vtlengine.meaningfuldata.eu).
@@ -1,10 +1,10 @@
1
1
  [project]
2
2
  name = "vtlengine"
3
- version = "1.1rc2"
3
+ version = "1.2.0"
4
4
  description = "Run and Validate VTL Scripts"
5
5
  license = "AGPL-3.0"
6
6
  readme = "README.md"
7
- requires-python = ">=3.9,<4"
7
+ requires-python = ">=3.9"
8
8
  authors = [
9
9
  {name = "MeaningfulData", email = "info@meaningfuldata.eu"},
10
10
  ]
@@ -25,13 +25,15 @@ keywords = ['vtl', 'sdmx', 'vtlengine', 'Validation and Transformation Language'
25
25
  dependencies = [
26
26
  # PyPi dependencies
27
27
  "duckdb>=1.1,<1.2",
28
-
28
+ "pysdmx[xml]>=1.4.0rc1,<2.0",
29
29
  # APT-supported dependencies
30
30
  "jsonschema>=3.2.0,<5.0",
31
31
  "sqlglot>=22.2.0,<23.0",
32
- "antlr4-python3-runtime>=4.9.2,<4.10",
33
- "pandas>=2.1.4,<2.2",
32
+ "antlr4-python3-runtime>=4.13.2,<4.14",
33
+ "pandas>=2.1.4,<3.0",
34
34
  "networkx>=2.8,<3.0",
35
+ "numpy>=1.23.2,<2 ; python_version < '3.13'",
36
+ "numpy>=2.1.0; python_version >= '3.13'",
35
37
  ]
36
38
 
37
39
  [project.optional-dependencies]
@@ -45,20 +47,26 @@ MeaningfulData = 'https://www.meaningfuldata.eu/'
45
47
  IssueTracker = 'https://github.com/Meaningful-Data/vtlengine/issues'
46
48
  Authors = 'https://github.com/Meaningful-Data/vtlengine/graphs/contributors'
47
49
 
50
+ [tool.poetry.dependencies]
51
+ python = ">=3.9,<4.0"
52
+
48
53
  [tool.poetry.group.dev.dependencies]
49
- pytest = "^8.3"
50
- pytest-cov = "^6.0.0"
54
+ pytest = "^8.4"
55
+ pytest-cov = "^6.2.1"
56
+ pytest-xdist = "^3.8.0"
51
57
  line-profiler-pycharm = "^1.2.0"
52
- mypy = "1.14.1"
53
- pandas-stubs = "^2.1.4.231227"
54
- ruff = "^0.9.4"
55
- types-jsonschema = "4.23.0.20241208"
58
+ mypy = "1.16.1"
59
+ pandas-stubs = "2.2.2.240807"
60
+ ruff = "^0.12.3"
61
+ types-jsonschema = "4.24.0.20250708"
56
62
 
57
63
  [tool.poetry.group.docs.dependencies]
58
64
  sphinx = "^7.4.7"
59
65
  sphinx-rtd-theme = "^3.0.2"
60
66
  toml = "^0.10.2"
61
67
 
68
+
69
+
62
70
  [tool.ruff]
63
71
  line-length = 100
64
72
  lint.mccabe.max-complexity = 20
@@ -68,7 +76,7 @@ lint.select = [
68
76
  # TODO: check S608 (duckdb querys)
69
77
  lint.ignore = ["B023", "B028", "B904", "C403", "D100", "D101", "D102", "D103", "D104", "D105",
70
78
  "D107", "D200", "D201", "D202", "D203", "D205", "D209", "D212", "D213", "D301",
71
- "D400", "D401", "D404", "D411", "D413", "D415", "D419", "E203", "S320", "S608"]
79
+ "D400", "D401", "D404", "D411", "D413", "D415", "D419", "E203", "S608"]
72
80
  lint.exclude = ["*/Grammar/*"]
73
81
 
74
82
  [tool.ruff.lint.per-file-ignores]
@@ -1,18 +1,37 @@
1
1
  import json
2
2
  import os
3
3
  from pathlib import Path
4
- from typing import Any, Dict, List, Optional, Union
4
+ from typing import Any, Dict, List, Literal, Optional, Tuple, Union
5
5
 
6
6
  import jsonschema
7
7
  import pandas as pd
8
+ from pysdmx.model.dataflow import Component as SDMXComponent
9
+ from pysdmx.model.dataflow import DataStructureDefinition, Schema
10
+ from pysdmx.model.dataflow import Role as SDMX_Role
11
+ from pysdmx.model.vtl import (
12
+ Ruleset,
13
+ RulesetScheme,
14
+ Transformation,
15
+ TransformationScheme,
16
+ UserDefinedOperator,
17
+ UserDefinedOperatorScheme,
18
+ )
8
19
 
20
+ from vtlengine import AST as AST
9
21
  from vtlengine.__extras_check import __check_s3_extra
10
- from vtlengine.AST import PersistentAssignment, Start
22
+ from vtlengine.AST import Assignment, DPRuleset, HRuleset, Operator, PersistentAssignment, Start
23
+ from vtlengine.AST.ASTString import ASTString
11
24
  from vtlengine.DataTypes import SCALAR_TYPES
12
- from vtlengine.Exceptions import InputValidationException, check_key
25
+ from vtlengine.Exceptions import (
26
+ InputValidationException,
27
+ SemanticError,
28
+ check_key,
29
+ )
13
30
  from vtlengine.files.parser import _fill_dataset_empty_data, _validate_pandas
14
31
  from vtlengine.Model import (
15
- Component,
32
+ Component as VTL_Component,
33
+ )
34
+ from vtlengine.Model import (
16
35
  Dataset,
17
36
  ExternalRoutine,
18
37
  Role,
@@ -20,18 +39,23 @@ from vtlengine.Model import (
20
39
  Scalar,
21
40
  ValueDomain,
22
41
  )
42
+ from vtlengine.Utils import VTL_DTYPES_MAPPING, VTL_ROLE_MAPPING
23
43
 
24
44
  base_path = Path(__file__).parent
25
45
  schema_path = base_path / "data" / "schema"
46
+ sdmx_csv_path = base_path / "data" / "sdmx_csv"
26
47
  with open(schema_path / "json_schema_2.1.json", "r") as file:
27
48
  schema = json.load(file)
28
49
 
29
50
 
30
- def _load_dataset_from_structure(structures: Dict[str, Any]) -> Dict[str, Any]:
51
+ def _load_dataset_from_structure(
52
+ structures: Dict[str, Any],
53
+ ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
31
54
  """
32
55
  Loads a dataset with the structure given.
33
56
  """
34
57
  datasets = {}
58
+ scalars = {}
35
59
 
36
60
  if "datasets" in structures:
37
61
  for dataset_json in structures["datasets"]:
@@ -66,7 +90,7 @@ def _load_dataset_from_structure(structures: Dict[str, Any]) -> Dict[str, Any]:
66
90
  else:
67
91
  component["nullable"] = False
68
92
 
69
- components[component["name"]] = Component(
93
+ components[component["name"]] = VTL_Component(
70
94
  name=component["name"],
71
95
  data_type=SCALAR_TYPES[component["data_type"]],
72
96
  role=Role(component["role"]),
@@ -77,7 +101,7 @@ def _load_dataset_from_structure(structures: Dict[str, Any]) -> Dict[str, Any]:
77
101
  for component in dataset_json["DataStructure"]:
78
102
  check_key("data_type", SCALAR_TYPES.keys(), component["type"])
79
103
  check_key("role", Role_keys, component["role"])
80
- components[component["name"]] = Component(
104
+ components[component["name"]] = VTL_Component(
81
105
  name=component["name"],
82
106
  data_type=SCALAR_TYPES[component["type"]],
83
107
  role=Role(component["role"]),
@@ -93,8 +117,8 @@ def _load_dataset_from_structure(structures: Dict[str, Any]) -> Dict[str, Any]:
93
117
  data_type=SCALAR_TYPES[scalar_json["type"]],
94
118
  value=None,
95
119
  )
96
- datasets[scalar_name] = scalar # type: ignore[assignment]
97
- return datasets
120
+ scalars[scalar_name] = scalar
121
+ return datasets, scalars
98
122
 
99
123
 
100
124
  def _load_single_datapoint(datapoint: Union[str, Path]) -> Dict[str, Any]:
@@ -142,7 +166,9 @@ def _load_datapoints_path(
142
166
  return _load_single_datapoint(datapoints)
143
167
 
144
168
 
145
- def _load_datastructure_single(data_structure: Union[Dict[str, Any], Path]) -> Dict[str, Dataset]:
169
+ def _load_datastructure_single(
170
+ data_structure: Union[Dict[str, Any], Path],
171
+ ) -> Tuple[Dict[str, Dataset], Dict[str, Scalar]]:
146
172
  """
147
173
  Loads a single data structure.
148
174
  """
@@ -153,13 +179,15 @@ def _load_datastructure_single(data_structure: Union[Dict[str, Any], Path]) -> D
153
179
  if not data_structure.exists():
154
180
  raise Exception("Invalid datastructure. Input does not exist")
155
181
  if data_structure.is_dir():
156
- datasets: Dict[str, Any] = {}
182
+ datasets: Dict[str, Dataset] = {}
183
+ scalars: Dict[str, Scalar] = {}
157
184
  for f in data_structure.iterdir():
158
185
  if f.suffix != ".json":
159
186
  continue
160
- dataset = _load_datastructure_single(f)
161
- datasets = {**datasets, **dataset}
162
- return datasets
187
+ ds, sc = _load_datastructure_single(f)
188
+ datasets = {**datasets, **ds}
189
+ scalars = {**scalars, **sc}
190
+ return datasets, scalars
163
191
  else:
164
192
  if data_structure.suffix != ".json":
165
193
  raise Exception("Invalid datastructure. Must have .json extension")
@@ -169,8 +197,8 @@ def _load_datastructure_single(data_structure: Union[Dict[str, Any], Path]) -> D
169
197
 
170
198
 
171
199
  def load_datasets(
172
- data_structure: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]],
173
- ) -> Dict[str, Dataset]:
200
+ data_structure: Union[Dict[str, Any], Path, List[Dict[str, Any]], List[Path]],
201
+ ) -> Tuple[Dict[str, Dataset], Dict[str, Scalar]]:
174
202
  """
175
203
  Loads multiple datasets.
176
204
 
@@ -188,21 +216,42 @@ def load_datasets(
188
216
  if isinstance(data_structure, dict):
189
217
  return _load_datastructure_single(data_structure)
190
218
  if isinstance(data_structure, list):
191
- ds_structures: Dict[str, Any] = {}
219
+ ds_structures: Dict[str, Dataset] = {}
220
+ scalar_structures: Dict[str, Scalar] = {}
192
221
  for x in data_structure:
193
- result = _load_datastructure_single(x)
194
- ds_structures = {**ds_structures, **result} # Overwrite ds_structures dict.
195
- return ds_structures
222
+ ds, sc = _load_datastructure_single(x)
223
+ ds_structures = {**ds_structures, **ds} # Overwrite ds_structures dict.
224
+ scalar_structures = {**scalar_structures, **sc} # Overwrite scalar_structures dict.
225
+ return ds_structures, scalar_structures
196
226
  return _load_datastructure_single(data_structure)
197
227
 
198
228
 
199
- def load_datasets_with_data(data_structures: Any, datapoints: Optional[Any] = None) -> Any:
229
+ def _handle_scalars_values(
230
+ scalars: Dict[str, Scalar],
231
+ scalar_values: Optional[Dict[str, Optional[Union[int, str, bool, float]]]] = None,
232
+ ) -> None:
233
+ if scalar_values is None:
234
+ return
235
+ # Handling scalar values with the scalar dict
236
+ for name, value in scalar_values.items():
237
+ if name not in scalars:
238
+ raise Exception(f"Not found scalar {name} in datastructures")
239
+ # Casting value to scalar data type
240
+ scalars[name].value = scalars[name].data_type.cast(value)
241
+
242
+
243
+ def load_datasets_with_data(
244
+ data_structures: Any,
245
+ datapoints: Optional[Any] = None,
246
+ scalar_values: Optional[Dict[str, Optional[Union[int, str, bool, float]]]] = None,
247
+ ) -> Any:
200
248
  """
201
249
  Loads the dataset structures and fills them with the data contained in the datapoints.
202
250
 
203
251
  Args:
204
252
  data_structures: Dict, Path or a List of dicts or Paths.
205
253
  datapoints: Dict, Path or a List of Paths.
254
+ scalar_values: Dict with the scalar values.
206
255
 
207
256
  Returns:
208
257
  A dict with the structure and a pandas dataframe with the data.
@@ -210,17 +259,18 @@ def load_datasets_with_data(data_structures: Any, datapoints: Optional[Any] = No
210
259
  Raises:
211
260
  Exception: If the Path is wrong or the file is invalid.
212
261
  """
213
- datasets = load_datasets(data_structures)
262
+ datasets, scalars = load_datasets(data_structures)
214
263
  if datapoints is None:
215
264
  for dataset in datasets.values():
216
265
  if isinstance(dataset, Dataset):
217
266
  _fill_dataset_empty_data(dataset)
218
- return datasets, None
267
+ _handle_scalars_values(scalars, scalar_values)
268
+ return datasets, scalars, None
219
269
  if isinstance(datapoints, dict):
220
270
  # Handling dictionary of Pandas Dataframes
221
271
  for dataset_name, data in datapoints.items():
222
272
  if dataset_name not in datasets:
223
- raise Exception(f"Not found dataset {dataset_name}")
273
+ raise Exception(f"Not found dataset {dataset_name} in datastructures.")
224
274
  datasets[dataset_name].data = _validate_pandas(
225
275
  datasets[dataset_name].components, data, dataset_name
226
276
  )
@@ -229,14 +279,17 @@ def load_datasets_with_data(data_structures: Any, datapoints: Optional[Any] = No
229
279
  datasets[dataset_name].data = pd.DataFrame(
230
280
  columns=list(datasets[dataset_name].components.keys())
231
281
  )
232
- return datasets, None
282
+ _handle_scalars_values(scalars, scalar_values)
283
+ return datasets, scalars, None
233
284
  # Handling dictionary of paths
234
285
  dict_datapoints = _load_datapoints_path(datapoints)
235
286
  for dataset_name, _ in dict_datapoints.items():
236
287
  if dataset_name not in datasets:
237
- raise Exception(f"Not found dataset {dataset_name}")
288
+ raise Exception(f"Not found dataset {dataset_name} in datastructures.")
238
289
 
239
- return datasets, dict_datapoints
290
+ _handle_scalars_values(scalars, scalar_values)
291
+
292
+ return datasets, scalars, dict_datapoints
240
293
 
241
294
 
242
295
  def load_vtl(input: Union[str, Path]) -> str:
@@ -345,8 +398,8 @@ def load_external_routines(input: Union[Dict[str, Any], Path, str]) -> Any:
345
398
 
346
399
 
347
400
  def _return_only_persistent_datasets(
348
- datasets: Dict[str, Dataset], ast: Start
349
- ) -> Dict[str, Dataset]:
401
+ datasets: Dict[str, Union[Dataset, Scalar]], ast: Start
402
+ ) -> Dict[str, Union[Dataset, Scalar]]:
350
403
  """
351
404
  Returns only the datasets with a persistent assignment.
352
405
  """
@@ -393,3 +446,209 @@ def _check_output_folder(output_folder: Union[str, Path]) -> None:
393
446
  if output_folder.suffix != "":
394
447
  raise ValueError("Output folder must be a Path or S3 URI to a directory")
395
448
  os.mkdir(output_folder)
449
+
450
+
451
+ def to_vtl_json(dsd: Union[DataStructureDefinition, Schema], dataset_name: str) -> Dict[str, Any]:
452
+ """
453
+ Converts a pysdmx `DataStructureDefinition` or `Schema` into a VTL-compatible JSON
454
+ representation.
455
+
456
+ This function extracts and transforms the components (dimensions, measures, and attributes)
457
+ from the given SDMX data structure and maps them into a dictionary format that conforms
458
+ to the expected VTL data structure json schema.
459
+
460
+ Args:
461
+ dsd: An instance of `DataStructureDefinition` or `Schema` from the `pysdmx` model.
462
+ dataset_name: The name of the resulting VTL dataset.
463
+
464
+ Returns:
465
+ A dictionary representing the dataset in VTL format, with keys for dataset name and its
466
+ components, including their name, role, data type, and nullability.
467
+ """
468
+ components = []
469
+ NAME = "name"
470
+ ROLE = "role"
471
+ TYPE = "type"
472
+ NULLABLE = "nullable"
473
+
474
+ _components: List[SDMXComponent] = []
475
+ _components.extend(dsd.components.dimensions)
476
+ _components.extend(dsd.components.measures)
477
+ _components.extend(dsd.components.attributes)
478
+
479
+ for c in _components:
480
+ _type = VTL_DTYPES_MAPPING[c.dtype]
481
+ _nullability = c.role != SDMX_Role.DIMENSION
482
+ _role = VTL_ROLE_MAPPING[c.role]
483
+
484
+ component = {
485
+ NAME: c.id,
486
+ ROLE: _role,
487
+ TYPE: _type,
488
+ NULLABLE: _nullability,
489
+ }
490
+
491
+ components.append(component)
492
+
493
+ result = {"datasets": [{"name": dataset_name, "DataStructure": components}]}
494
+
495
+ return result
496
+
497
+
498
+ def __generate_transformation(
499
+ child: Union[Assignment, PersistentAssignment], is_persistent: bool, count: int
500
+ ) -> Transformation:
501
+ expression = ASTString().render(ast=child.right)
502
+ result = child.left.value # type: ignore[attr-defined]
503
+ return Transformation(
504
+ id=f"T{count}",
505
+ expression=expression,
506
+ is_persistent=is_persistent,
507
+ result=result,
508
+ name=f"Transformation {result}",
509
+ )
510
+
511
+
512
+ def __generate_udo(child: Operator, count: int) -> UserDefinedOperator:
513
+ operator_definition = ASTString().render(ast=child)
514
+ return UserDefinedOperator(
515
+ id=f"UDO{count}",
516
+ operator_definition=operator_definition,
517
+ name=f"UserDefinedOperator {child.op}",
518
+ )
519
+
520
+
521
+ def __generate_ruleset(child: Union[DPRuleset, HRuleset], count: int) -> Ruleset:
522
+ ruleset_definition = ASTString().render(ast=child)
523
+ ruleset_type: Literal["datapoint", "hierarchical"] = (
524
+ "datapoint" if isinstance(child, DPRuleset) else "hierarchical"
525
+ )
526
+ ruleset_scope: Literal["variable", "valuedomain"] = (
527
+ "variable" if child.signature_type == "variable" else "valuedomain"
528
+ )
529
+ return Ruleset(
530
+ id=f"R{count}",
531
+ ruleset_definition=ruleset_definition,
532
+ ruleset_type=ruleset_type,
533
+ ruleset_scope=ruleset_scope,
534
+ name=f"{ruleset_type.capitalize()} ruleset {child.name}",
535
+ )
536
+
537
+
538
+ def ast_to_sdmx(ast: AST.Start, agency_id: str, id: str, version: str) -> TransformationScheme:
539
+ """
540
+ Converts a vtl AST into an SDMX compatible `TransformationScheme` object, following
541
+ the pysdmx model.
542
+
543
+ This function iterates over the child nodes of the given AST and categorizes each into one of
544
+ the following types:
545
+ - `PersistentAssignment`: Represents a persistent transformation. These are added to the
546
+ transformation list with a persistence flag.
547
+ - `Assignment`: Represents a temporary (non-persistent) transformation. These are added to the
548
+ transformation list without the persistence flag
549
+ - `DPRuleset` or `HRuleset`: Represent validation rule sets.
550
+ These are collected and wrapped into a `RulesetScheme` object.
551
+ - `Operator`: Defines user-defined operators. These are collected
552
+ into a `UserDefinedOperatorScheme` object.
553
+
554
+ After parsing all AST elements:
555
+ - If any rulesets were found, a `RulesetScheme` is created and added to the references.
556
+ - If any user-defined operators were found, a `UserDefinedOperatorScheme` is created and added
557
+ to the references.
558
+ - A `TransformationScheme` object is constructed with all collected transformations and any
559
+ additional references.
560
+
561
+ Args:
562
+ ast: The root node of the vtl ast representing the set of
563
+ vtl expressions.
564
+ agency_id: The identifier of the agency defining the SDMX structure as a string.
565
+ id: The identifier of the transformation scheme as a string.
566
+ version: The version of the transformation scheme given as a string.
567
+
568
+ Returns:
569
+ TransformationScheme: A fully constructed transformation scheme that includes
570
+ transformations, and optionally rule sets and user-defined operator schemes,
571
+ suitable for SDMX.
572
+
573
+ """
574
+ list_transformation = []
575
+ list_udos = []
576
+ list_rulesets = []
577
+ count_transformation = 0
578
+ count_udo = 0
579
+ count_ruleset = 0
580
+
581
+ for child in ast.children:
582
+ if isinstance(child, PersistentAssignment):
583
+ count_transformation += 1
584
+ list_transformation.append(
585
+ __generate_transformation(
586
+ child=child, is_persistent=True, count=count_transformation
587
+ )
588
+ )
589
+ elif isinstance(child, Assignment):
590
+ count_transformation += 1
591
+ list_transformation.append(
592
+ __generate_transformation(
593
+ child=child, is_persistent=False, count=count_transformation
594
+ )
595
+ )
596
+ elif isinstance(child, (DPRuleset, HRuleset)):
597
+ count_ruleset += 1
598
+ list_rulesets.append(__generate_ruleset(child=child, count=count_ruleset))
599
+ elif isinstance(child, Operator):
600
+ count_udo += 1
601
+ list_udos.append(__generate_udo(child=child, count=count_udo))
602
+
603
+ references: Any = {}
604
+ if list_rulesets:
605
+ references["ruleset_schemes"] = [
606
+ RulesetScheme(
607
+ items=list_rulesets,
608
+ agency=agency_id,
609
+ id="RS1",
610
+ vtl_version="2.1",
611
+ version=version,
612
+ name=f"RulesetScheme {id}-RS",
613
+ )
614
+ ]
615
+ if list_udos:
616
+ references["user_defined_operator_schemes"] = [
617
+ UserDefinedOperatorScheme(
618
+ items=list_udos,
619
+ agency=agency_id,
620
+ id="UDS1",
621
+ vtl_version="2.1",
622
+ version=version,
623
+ name=f"UserDefinedOperatorScheme {id}-UDS",
624
+ )
625
+ ]
626
+
627
+ transformation_scheme = TransformationScheme(
628
+ items=list_transformation,
629
+ agency=agency_id,
630
+ id="TS1",
631
+ vtl_version="2.1",
632
+ version=version,
633
+ name=f"TransformationScheme {id}",
634
+ **references,
635
+ )
636
+
637
+ return transformation_scheme
638
+
639
+
640
+ def _check_script(script: Union[str, TransformationScheme, Path]) -> str:
641
+ """
642
+ Check if the TransformationScheme object is valid to generate a vtl script.
643
+ """
644
+ if not isinstance(script, (str, TransformationScheme, Path)):
645
+ raise SemanticError("0-1-1-1", format_=type(script).__name__)
646
+ if isinstance(script, TransformationScheme):
647
+ from pysdmx.toolkit.vtl import (
648
+ generate_vtl_script,
649
+ )
650
+
651
+ vtl_script = generate_vtl_script(script, model_validation=True)
652
+ return vtl_script
653
+ else:
654
+ return str(script)