vtlengine 1.1rc1__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vtlengine might be problematic. Click here for more details.

@@ -1,18 +1,33 @@
1
1
  import json
2
2
  import os
3
3
  from pathlib import Path
4
- from typing import Any, Dict, List, Optional, Union
4
+ from typing import Any, Dict, List, Literal, Optional, Union
5
5
 
6
6
  import jsonschema
7
7
  import pandas as pd
8
+ from pysdmx.model.dataflow import Component as SDMXComponent
9
+ from pysdmx.model.dataflow import DataStructureDefinition, Schema
10
+ from pysdmx.model.dataflow import Role as SDMX_Role
11
+ from pysdmx.model.vtl import (
12
+ Ruleset,
13
+ RulesetScheme,
14
+ Transformation,
15
+ TransformationScheme,
16
+ UserDefinedOperator,
17
+ UserDefinedOperatorScheme,
18
+ )
8
19
 
20
+ from vtlengine import AST as AST
9
21
  from vtlengine.__extras_check import __check_s3_extra
10
- from vtlengine.AST import PersistentAssignment, Start
22
+ from vtlengine.AST import Assignment, DPRuleset, HRuleset, Operator, PersistentAssignment, Start
23
+ from vtlengine.AST.ASTString import ASTString
11
24
  from vtlengine.DataTypes import SCALAR_TYPES
12
25
  from vtlengine.Exceptions import InputValidationException, check_key
13
26
  from vtlengine.files.parser import _fill_dataset_empty_data, _validate_pandas
14
27
  from vtlengine.Model import (
15
- Component,
28
+ Component as VTL_Component,
29
+ )
30
+ from vtlengine.Model import (
16
31
  Dataset,
17
32
  ExternalRoutine,
18
33
  Role,
@@ -20,9 +35,11 @@ from vtlengine.Model import (
20
35
  Scalar,
21
36
  ValueDomain,
22
37
  )
38
+ from vtlengine.Utils import VTL_DTYPES_MAPPING, VTL_ROLE_MAPPING
23
39
 
24
40
  base_path = Path(__file__).parent
25
41
  schema_path = base_path / "data" / "schema"
42
+ sdmx_csv_path = base_path / "data" / "sdmx_csv"
26
43
  with open(schema_path / "json_schema_2.1.json", "r") as file:
27
44
  schema = json.load(file)
28
45
 
@@ -66,7 +83,7 @@ def _load_dataset_from_structure(structures: Dict[str, Any]) -> Dict[str, Any]:
66
83
  else:
67
84
  component["nullable"] = False
68
85
 
69
- components[component["name"]] = Component(
86
+ components[component["name"]] = VTL_Component(
70
87
  name=component["name"],
71
88
  data_type=SCALAR_TYPES[component["data_type"]],
72
89
  role=Role(component["role"]),
@@ -77,7 +94,7 @@ def _load_dataset_from_structure(structures: Dict[str, Any]) -> Dict[str, Any]:
77
94
  for component in dataset_json["DataStructure"]:
78
95
  check_key("data_type", SCALAR_TYPES.keys(), component["type"])
79
96
  check_key("role", Role_keys, component["role"])
80
- components[component["name"]] = Component(
97
+ components[component["name"]] = VTL_Component(
81
98
  name=component["name"],
82
99
  data_type=SCALAR_TYPES[component["type"]],
83
100
  role=Role(component["role"]),
@@ -169,7 +186,7 @@ def _load_datastructure_single(data_structure: Union[Dict[str, Any], Path]) -> D
169
186
 
170
187
 
171
188
  def load_datasets(
172
- data_structure: Union[Dict[str, Any], Path, List[Union[Dict[str, Any], Path]]],
189
+ data_structure: Union[Dict[str, Any], Path, List[Dict[str, Any]], List[Path]],
173
190
  ) -> Dict[str, Dataset]:
174
191
  """
175
192
  Loads multiple datasets.
@@ -393,3 +410,211 @@ def _check_output_folder(output_folder: Union[str, Path]) -> None:
393
410
  if output_folder.suffix != "":
394
411
  raise ValueError("Output folder must be a Path or S3 URI to a directory")
395
412
  os.mkdir(output_folder)
413
+
414
+
415
+ def to_vtl_json(dsd: Union[DataStructureDefinition, Schema], dataset_name: str) -> Dict[str, Any]:
416
+ """
417
+ Converts a pysdmx `DataStructureDefinition` or `Schema` into a VTL-compatible JSON
418
+ representation.
419
+
420
+ This function extracts and transforms the components (dimensions, measures, and attributes)
421
+ from the given SDMX data structure and maps them into a dictionary format that conforms
422
+ to the expected VTL data structure json schema.
423
+
424
+ Args:
425
+ dsd: An instance of `DataStructureDefinition` or `Schema` from the `pysdmx` model.
426
+ dataset_name: The name of the resulting VTL dataset.
427
+
428
+ Returns:
429
+ A dictionary representing the dataset in VTL format, with keys for dataset name and its
430
+ components, including their name, role, data type, and nullability.
431
+ """
432
+ components = []
433
+ NAME = "name"
434
+ ROLE = "role"
435
+ TYPE = "type"
436
+ NULLABLE = "nullable"
437
+
438
+ _components: List[SDMXComponent] = []
439
+ _components.extend(dsd.components.dimensions)
440
+ _components.extend(dsd.components.measures)
441
+ _components.extend(dsd.components.attributes)
442
+
443
+ for c in _components:
444
+ _type = VTL_DTYPES_MAPPING[c.dtype]
445
+ _nullability = c.role != SDMX_Role.DIMENSION
446
+ _role = VTL_ROLE_MAPPING[c.role]
447
+
448
+ component = {
449
+ NAME: c.id,
450
+ ROLE: _role,
451
+ TYPE: _type,
452
+ NULLABLE: _nullability,
453
+ }
454
+
455
+ components.append(component)
456
+
457
+ result = {"datasets": [{"name": dataset_name, "DataStructure": components}]}
458
+
459
+ return result
460
+
461
+
462
+ def __generate_transformation(
463
+ child: Union[Assignment, PersistentAssignment], is_persistent: bool, count: int
464
+ ) -> Transformation:
465
+ expression = ASTString().render(ast=child.right)
466
+ result = child.left.value # type: ignore[attr-defined]
467
+ return Transformation(
468
+ id=f"T{count}",
469
+ expression=expression,
470
+ is_persistent=is_persistent,
471
+ result=result,
472
+ name=f"Transformation {result}",
473
+ )
474
+
475
+
476
+ def __generate_udo(child: Operator, count: int) -> UserDefinedOperator:
477
+ operator_definition = ASTString().render(ast=child)
478
+ return UserDefinedOperator(
479
+ id=f"UDO{count}",
480
+ operator_definition=operator_definition,
481
+ name=f"UserDefinedOperator {child.op}",
482
+ )
483
+
484
+
485
+ def __generate_ruleset(child: Union[DPRuleset, HRuleset], count: int) -> Ruleset:
486
+ ruleset_definition = ASTString().render(ast=child)
487
+ ruleset_type: Literal["datapoint", "hierarchical"] = (
488
+ "datapoint" if isinstance(child, DPRuleset) else "hierarchical"
489
+ )
490
+ ruleset_scope: Literal["variable", "valuedomain"] = (
491
+ "variable" if child.signature_type == "variable" else "valuedomain"
492
+ )
493
+ return Ruleset(
494
+ id=f"R{count}",
495
+ ruleset_definition=ruleset_definition,
496
+ ruleset_type=ruleset_type,
497
+ ruleset_scope=ruleset_scope,
498
+ name=f"{ruleset_type.capitalize()} ruleset {child.name}",
499
+ )
500
+
501
+
502
+ def ast_to_sdmx(ast: AST.Start, agency_id: str, id: str, version: str) -> TransformationScheme:
503
+ """
504
+ Converts a vtl AST into an SDMX compatible `TransformationScheme` object, following
505
+ the pysdmx model.
506
+
507
+ This function iterates over the child nodes of the given AST and categorizes each into one of
508
+ the following types:
509
+ - `PersistentAssignment`: Represents a persistent transformation. These are added to the
510
+ transformation list with a persistence flag.
511
+ - `Assignment`: Represents a temporary (non-persistent) transformation. These are added to the
512
+ transformation list without the persistence flag
513
+ - `DPRuleset` or `HRuleset`: Represent validation rule sets.
514
+ These are collected and wrapped into a `RulesetScheme` object.
515
+ - `Operator`: Defines user-defined operators. These are collected
516
+ into a `UserDefinedOperatorScheme` object.
517
+
518
+ After parsing all AST elements:
519
+ - If any rulesets were found, a `RulesetScheme` is created and added to the references.
520
+ - If any user-defined operators were found, a `UserDefinedOperatorScheme` is created and added
521
+ to the references.
522
+ - A `TransformationScheme` object is constructed with all collected transformations and any
523
+ additional references.
524
+
525
+ Args:
526
+ ast: The root node of the vtl ast representing the set of
527
+ vtl expressions.
528
+ agency_id: The identifier of the agency defining the SDMX structure as a string.
529
+ id: The identifier of the transformation scheme as a string.
530
+ version: The version of the transformation scheme given as a string.
531
+
532
+ Returns:
533
+ TransformationScheme: A fully constructed transformation scheme that includes
534
+ transformations, and optionally rule sets and user-defined operator schemes,
535
+ suitable for SDMX.
536
+
537
+ """
538
+ list_transformation = []
539
+ list_udos = []
540
+ list_rulesets = []
541
+ count_transformation = 0
542
+ count_udo = 0
543
+ count_ruleset = 0
544
+
545
+ for child in ast.children:
546
+ if isinstance(child, PersistentAssignment):
547
+ count_transformation += 1
548
+ list_transformation.append(
549
+ __generate_transformation(
550
+ child=child, is_persistent=True, count=count_transformation
551
+ )
552
+ )
553
+ elif isinstance(child, Assignment):
554
+ count_transformation += 1
555
+ list_transformation.append(
556
+ __generate_transformation(
557
+ child=child, is_persistent=False, count=count_transformation
558
+ )
559
+ )
560
+ elif isinstance(child, (DPRuleset, HRuleset)):
561
+ count_ruleset += 1
562
+ list_rulesets.append(__generate_ruleset(child=child, count=count_ruleset))
563
+ elif isinstance(child, Operator):
564
+ count_udo += 1
565
+ list_udos.append(__generate_udo(child=child, count=count_udo))
566
+
567
+ references: Any = {}
568
+ if list_rulesets:
569
+ references["ruleset_schemes"] = [
570
+ RulesetScheme(
571
+ items=list_rulesets,
572
+ agency=agency_id,
573
+ id="RS1",
574
+ vtl_version="2.1",
575
+ version=version,
576
+ name=f"RulesetScheme {id}-RS",
577
+ )
578
+ ]
579
+ if list_udos:
580
+ references["user_defined_operator_schemes"] = [
581
+ UserDefinedOperatorScheme(
582
+ items=list_udos,
583
+ agency=agency_id,
584
+ id="UDS1",
585
+ vtl_version="2.1",
586
+ version=version,
587
+ name=f"UserDefinedOperatorScheme {id}-UDS",
588
+ )
589
+ ]
590
+
591
+ transformation_scheme = TransformationScheme(
592
+ items=list_transformation,
593
+ agency=agency_id,
594
+ id="TS1",
595
+ vtl_version="2.1",
596
+ version=version,
597
+ name=f"TransformationScheme {id}",
598
+ **references,
599
+ )
600
+
601
+ return transformation_scheme
602
+
603
+
604
+ def _check_script(script: Union[str, TransformationScheme, Path]) -> str:
605
+ """
606
+ Check if the TransformationScheme object is valid to generate a vtl script.
607
+ """
608
+ if not isinstance(script, (str, TransformationScheme, Path)):
609
+ raise Exception(
610
+ "Invalid script format. Input must be a string, TransformationScheme or Path object"
611
+ )
612
+ if isinstance(script, TransformationScheme):
613
+ from pysdmx.toolkit.vtl.generate_vtl_script import (
614
+ generate_vtl_script,
615
+ )
616
+
617
+ vtl_script = generate_vtl_script(script, model_validation=True)
618
+ return vtl_script
619
+ else:
620
+ return str(script)