vtlengine 1.1rc2__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vtlengine might be problematic. Click here for more details.
- vtlengine/API/_InternalApi.py +231 -6
- vtlengine/API/__init__.py +256 -65
- vtlengine/AST/ASTComment.py +56 -0
- vtlengine/AST/ASTConstructor.py +71 -18
- vtlengine/AST/ASTConstructorModules/Expr.py +191 -72
- vtlengine/AST/ASTConstructorModules/ExprComponents.py +81 -38
- vtlengine/AST/ASTConstructorModules/Terminals.py +76 -31
- vtlengine/AST/ASTConstructorModules/__init__.py +50 -0
- vtlengine/AST/ASTEncoders.py +4 -0
- vtlengine/AST/ASTString.py +622 -0
- vtlengine/AST/ASTTemplate.py +28 -2
- vtlengine/AST/DAG/__init__.py +10 -1
- vtlengine/AST/__init__.py +127 -14
- vtlengine/Exceptions/messages.py +9 -0
- vtlengine/Interpreter/__init__.py +53 -8
- vtlengine/Model/__init__.py +9 -4
- vtlengine/Operators/Aggregation.py +7 -5
- vtlengine/Operators/Analytic.py +16 -11
- vtlengine/Operators/Conditional.py +20 -5
- vtlengine/Operators/Time.py +11 -10
- vtlengine/Utils/__init__.py +49 -0
- vtlengine/__init__.py +4 -2
- vtlengine/files/parser/__init__.py +16 -26
- vtlengine/files/parser/_rfc_dialect.py +1 -1
- vtlengine/py.typed +0 -0
- vtlengine-1.1.1.dist-info/METADATA +92 -0
- {vtlengine-1.1rc2.dist-info → vtlengine-1.1.1.dist-info}/RECORD +29 -26
- {vtlengine-1.1rc2.dist-info → vtlengine-1.1.1.dist-info}/WHEEL +1 -1
- vtlengine-1.1rc2.dist-info/METADATA +0 -248
- {vtlengine-1.1rc2.dist-info → vtlengine-1.1.1.dist-info}/LICENSE.md +0 -0
vtlengine/Model/__init__.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import inspect
|
|
1
2
|
import json
|
|
2
3
|
from collections import Counter
|
|
3
4
|
from dataclasses import dataclass
|
|
@@ -128,10 +129,14 @@ class Component:
|
|
|
128
129
|
)
|
|
129
130
|
|
|
130
131
|
def to_dict(self) -> Dict[str, Any]:
|
|
132
|
+
data_type = self.data_type
|
|
133
|
+
if not inspect.isclass(self.data_type):
|
|
134
|
+
data_type = self.data_type.__class__ # type: ignore[assignment]
|
|
131
135
|
return {
|
|
132
136
|
"name": self.name,
|
|
133
|
-
"data_type": DataTypes.SCALAR_TYPES_CLASS_REVERSE[
|
|
134
|
-
|
|
137
|
+
"data_type": DataTypes.SCALAR_TYPES_CLASS_REVERSE[data_type],
|
|
138
|
+
# Need to check here for NoneType as UDO argument has it
|
|
139
|
+
"role": self.role.value if self.role is not None else None, # type: ignore[redundant-expr]
|
|
135
140
|
"nullable": self.nullable,
|
|
136
141
|
}
|
|
137
142
|
|
|
@@ -229,11 +234,11 @@ class Dataset:
|
|
|
229
234
|
self.data[comp.name] = self.data[comp.name].astype(str)
|
|
230
235
|
other.data[comp.name] = other.data[comp.name].astype(str)
|
|
231
236
|
self.data[comp.name] = self.data[comp.name].map(
|
|
232
|
-
lambda x: str(TimePeriodHandler(x)) if x != "" else "",
|
|
237
|
+
lambda x: str(TimePeriodHandler(str(x))) if x != "" else "",
|
|
233
238
|
na_action="ignore",
|
|
234
239
|
)
|
|
235
240
|
other.data[comp.name] = other.data[comp.name].map(
|
|
236
|
-
lambda x: str(TimePeriodHandler(x)) if x != "" else "",
|
|
241
|
+
lambda x: str(TimePeriodHandler(str(x))) if x != "" else "",
|
|
237
242
|
na_action="ignore",
|
|
238
243
|
)
|
|
239
244
|
elif type_name in ["Integer", "Number"]:
|
|
@@ -78,7 +78,7 @@ class Aggregation(Operator.Unary):
|
|
|
78
78
|
data[measure.name] = (
|
|
79
79
|
data[measure.name]
|
|
80
80
|
.astype(object)
|
|
81
|
-
.map(lambda x: TimePeriodHandler(x), na_action="ignore")
|
|
81
|
+
.map(lambda x: TimePeriodHandler(str(x)), na_action="ignore")
|
|
82
82
|
)
|
|
83
83
|
else:
|
|
84
84
|
data[measure.name] = data[measure.name].map(
|
|
@@ -90,7 +90,7 @@ class Aggregation(Operator.Unary):
|
|
|
90
90
|
data[measure.name]
|
|
91
91
|
.astype(object)
|
|
92
92
|
.map(
|
|
93
|
-
lambda x: TimeIntervalHandler.from_iso_format(x),
|
|
93
|
+
lambda x: TimeIntervalHandler.from_iso_format(str(x)),
|
|
94
94
|
na_action="ignore",
|
|
95
95
|
)
|
|
96
96
|
)
|
|
@@ -103,11 +103,13 @@ class Aggregation(Operator.Unary):
|
|
|
103
103
|
elif measure.data_type == Duration:
|
|
104
104
|
if mode == "input":
|
|
105
105
|
data[measure.name] = data[measure.name].map(
|
|
106
|
-
lambda x: PERIOD_IND_MAPPING[x],
|
|
106
|
+
lambda x: PERIOD_IND_MAPPING[x], # type: ignore[index]
|
|
107
|
+
na_action="ignore",
|
|
107
108
|
)
|
|
108
109
|
else:
|
|
109
110
|
data[measure.name] = data[measure.name].map(
|
|
110
|
-
lambda x: PERIOD_IND_MAPPING_REVERSE[x],
|
|
111
|
+
lambda x: PERIOD_IND_MAPPING_REVERSE[x], # type: ignore[index]
|
|
112
|
+
na_action="ignore",
|
|
111
113
|
)
|
|
112
114
|
elif measure.data_type == Boolean:
|
|
113
115
|
if mode == "result":
|
|
@@ -208,7 +210,7 @@ class Aggregation(Operator.Unary):
|
|
|
208
210
|
e = f'"{e}"'
|
|
209
211
|
if cls.type_to_check is not None and cls.op != COUNT:
|
|
210
212
|
functions += (
|
|
211
|
-
f"{cls.py_op}(CAST({e} AS
|
|
213
|
+
f"{cls.py_op}(CAST({e} AS DOUBLE)) AS {e}, " # Count can only be one here
|
|
212
214
|
)
|
|
213
215
|
elif cls.op == COUNT:
|
|
214
216
|
functions += f"{cls.py_op}({e}) AS int_var, "
|
vtlengine/Operators/Analytic.py
CHANGED
|
@@ -189,21 +189,26 @@ class Analytic(Operator.Unary):
|
|
|
189
189
|
if window is not None:
|
|
190
190
|
mode = "ROWS" if window.type_ == "data" else "RANGE"
|
|
191
191
|
start_mode = (
|
|
192
|
-
window.start_mode
|
|
193
|
-
if window.
|
|
192
|
+
window.start_mode.upper()
|
|
193
|
+
if (isinstance(window.start, int) and window.start != 0)
|
|
194
|
+
or (isinstance(window.start, str) and window.start == "unbounded")
|
|
194
195
|
else ""
|
|
195
196
|
)
|
|
196
197
|
stop_mode = (
|
|
197
|
-
window.stop_mode
|
|
198
|
-
if window.
|
|
198
|
+
window.stop_mode.upper()
|
|
199
|
+
if (isinstance(window.stop, int) and window.stop != 0)
|
|
200
|
+
or (isinstance(window.stop, str) and window.stop == "unbounded")
|
|
199
201
|
else ""
|
|
200
202
|
)
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
203
|
+
start = (
|
|
204
|
+
"UNBOUNDED"
|
|
205
|
+
if window.start == "unbounded" or window.start == -1
|
|
206
|
+
else str(window.start)
|
|
207
|
+
)
|
|
208
|
+
stop = (
|
|
209
|
+
"CURRENT ROW" if window.stop == "current" or window.stop == 0 else str(window.stop)
|
|
210
|
+
)
|
|
211
|
+
window_str = f"{mode} BETWEEN {start} {start_mode} AND {stop} {stop_mode}"
|
|
207
212
|
|
|
208
213
|
# Partitioning
|
|
209
214
|
partition = "PARTITION BY " + ", ".join(partitioning) if len(partitioning) > 0 else ""
|
|
@@ -224,7 +229,7 @@ class Analytic(Operator.Unary):
|
|
|
224
229
|
if cls.op == RANK:
|
|
225
230
|
measure_query = f"{cls.sql_op}()"
|
|
226
231
|
elif cls.op == RATIO_TO_REPORT:
|
|
227
|
-
measure_query = f"CAST({measure} AS
|
|
232
|
+
measure_query = f"CAST({measure} AS DOUBLE) / SUM(CAST({measure} AS DOUBLE))"
|
|
228
233
|
elif cls.op in [LAG, LEAD]:
|
|
229
234
|
measure_query = f"{cls.sql_op}({measure}, {','.join(map(str, params or []))})"
|
|
230
235
|
else:
|
|
@@ -238,7 +238,7 @@ class Nvl(Binary):
|
|
|
238
238
|
result = cls.validate(left, right)
|
|
239
239
|
|
|
240
240
|
if isinstance(left, Scalar) and isinstance(result, Scalar):
|
|
241
|
-
if
|
|
241
|
+
if left.data_type is Null:
|
|
242
242
|
result.value = right.value
|
|
243
243
|
else:
|
|
244
244
|
result.value = left.value
|
|
@@ -308,8 +308,23 @@ class Case(Operator):
|
|
|
308
308
|
) -> Union[Scalar, DataComponent, Dataset]:
|
|
309
309
|
result = cls.validate(conditions, thenOps, elseOp)
|
|
310
310
|
for condition in conditions:
|
|
311
|
-
if isinstance(condition,
|
|
311
|
+
if isinstance(condition, Dataset) and condition.data is not None:
|
|
312
312
|
condition.data.fillna(False, inplace=True)
|
|
313
|
+
condition_measure = condition.get_measures_names()[0]
|
|
314
|
+
if condition.data[condition_measure].dtype != bool:
|
|
315
|
+
condition.data[condition_measure] = condition.data[condition_measure].astype(
|
|
316
|
+
bool
|
|
317
|
+
)
|
|
318
|
+
elif (
|
|
319
|
+
isinstance(
|
|
320
|
+
condition,
|
|
321
|
+
DataComponent,
|
|
322
|
+
)
|
|
323
|
+
and condition.data is not None
|
|
324
|
+
):
|
|
325
|
+
condition.data.fillna(False, inplace=True)
|
|
326
|
+
if condition.data.dtype != bool:
|
|
327
|
+
condition.data = condition.data.astype(bool)
|
|
313
328
|
elif isinstance(condition, Scalar) and condition.value is None:
|
|
314
329
|
condition.value = False
|
|
315
330
|
|
|
@@ -324,9 +339,9 @@ class Case(Operator):
|
|
|
324
339
|
|
|
325
340
|
for i, condition in enumerate(conditions):
|
|
326
341
|
value = thenOps[i].value if isinstance(thenOps[i], Scalar) else thenOps[i].data
|
|
327
|
-
result.data = np.where(
|
|
342
|
+
result.data = np.where(
|
|
328
343
|
condition.data.notna(),
|
|
329
|
-
np.where(condition.data, value, result.data),
|
|
344
|
+
np.where(condition.data, value, result.data),
|
|
330
345
|
result.data,
|
|
331
346
|
)
|
|
332
347
|
|
|
@@ -366,7 +381,7 @@ class Case(Operator):
|
|
|
366
381
|
]
|
|
367
382
|
)
|
|
368
383
|
|
|
369
|
-
result.data.loc[condition_mask_else, columns] = (
|
|
384
|
+
result.data.loc[condition_mask_else, columns] = ( # type: ignore[index, unused-ignore]
|
|
370
385
|
elseOp.value
|
|
371
386
|
if isinstance(elseOp, Scalar)
|
|
372
387
|
else elseOp.data.loc[condition_mask_else, columns]
|
vtlengine/Operators/Time.py
CHANGED
|
@@ -57,14 +57,17 @@ class Time(Operators.Operator):
|
|
|
57
57
|
op = FLOW_TO_STOCK
|
|
58
58
|
|
|
59
59
|
@classmethod
|
|
60
|
-
def _get_time_id(cls, operand: Dataset) ->
|
|
60
|
+
def _get_time_id(cls, operand: Dataset) -> str:
|
|
61
61
|
reference_id = None
|
|
62
|
+
identifiers = operand.get_identifiers()
|
|
63
|
+
if len(identifiers) == 0:
|
|
64
|
+
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
62
65
|
for id in operand.get_identifiers():
|
|
63
66
|
if id.data_type in cls.TIME_DATA_TYPES:
|
|
64
67
|
if reference_id is not None:
|
|
65
68
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time dataset")
|
|
66
69
|
reference_id = id.name
|
|
67
|
-
return reference_id
|
|
70
|
+
return str(reference_id)
|
|
68
71
|
|
|
69
72
|
@classmethod
|
|
70
73
|
def sort_by_time(cls, operand: Dataset) -> Optional[pd.DataFrame]:
|
|
@@ -182,7 +185,7 @@ class Period_indicator(Unary):
|
|
|
182
185
|
def validate(cls, operand: Any) -> Any:
|
|
183
186
|
if isinstance(operand, Dataset):
|
|
184
187
|
time_id = cls._get_time_id(operand)
|
|
185
|
-
if
|
|
188
|
+
if operand.components[time_id].data_type != TimePeriod:
|
|
186
189
|
raise SemanticError("1-1-19-8", op=cls.op, comp_type="time period dataset")
|
|
187
190
|
result_components = {
|
|
188
191
|
comp.name: comp
|
|
@@ -223,7 +226,7 @@ class Period_indicator(Unary):
|
|
|
223
226
|
if (operand.data is not None)
|
|
224
227
|
else pd.Series()
|
|
225
228
|
)
|
|
226
|
-
period_series: Any = result.data[cls.time_id].map(cls._get_period)
|
|
229
|
+
period_series: Any = result.data[cls.time_id].map(cls._get_period)
|
|
227
230
|
result.data["duration_var"] = period_series
|
|
228
231
|
return result
|
|
229
232
|
|
|
@@ -378,7 +381,7 @@ class Fill_time_series(Binary):
|
|
|
378
381
|
)
|
|
379
382
|
|
|
380
383
|
filled_data = pd.concat(filled_data, ignore_index=True)
|
|
381
|
-
combined_data = pd.concat([filled_data, data], ignore_index=True)
|
|
384
|
+
combined_data = pd.concat([filled_data, data], ignore_index=True)
|
|
382
385
|
if len(cls.periods) == 1 and cls.periods[0] == "A":
|
|
383
386
|
combined_data[cls.time_id] = combined_data[cls.time_id].astype(int)
|
|
384
387
|
else:
|
|
@@ -456,7 +459,7 @@ class Fill_time_series(Binary):
|
|
|
456
459
|
|
|
457
460
|
filled_data = pd.concat(filled_data, ignore_index=True)
|
|
458
461
|
filled_data[cls.time_id] = filled_data[cls.time_id].dt.strftime(date_format)
|
|
459
|
-
combined_data = pd.concat([filled_data, data], ignore_index=True)
|
|
462
|
+
combined_data = pd.concat([filled_data, data], ignore_index=True)
|
|
460
463
|
combined_data[cls.time_id] = combined_data[cls.time_id].astype(str)
|
|
461
464
|
return combined_data.sort_values(by=cls.other_ids + [cls.time_id])
|
|
462
465
|
|
|
@@ -544,9 +547,7 @@ class Time_Shift(Binary):
|
|
|
544
547
|
shift_value = int(shift_value.value)
|
|
545
548
|
cls.time_id = cls._get_time_id(result)
|
|
546
549
|
|
|
547
|
-
data_type: Any =
|
|
548
|
-
result.components[cls.time_id].data_type if isinstance(cls.time_id, str) else None
|
|
549
|
-
)
|
|
550
|
+
data_type: Any = result.components[cls.time_id].data_type
|
|
550
551
|
|
|
551
552
|
if data_type == Date:
|
|
552
553
|
freq = cls.find_min_frequency(
|
|
@@ -966,7 +967,7 @@ class Date_Add(Parametrized):
|
|
|
966
967
|
for measure in operand.get_measures():
|
|
967
968
|
if measure.data_type in [Date, TimePeriod]:
|
|
968
969
|
result.data[measure.name] = result.data[measure.name].map(
|
|
969
|
-
lambda x: cls.py_op(x, shift, period, measure.data_type == TimePeriod),
|
|
970
|
+
lambda x: cls.py_op(str(x), shift, period, measure.data_type == TimePeriod),
|
|
970
971
|
na_action="ignore",
|
|
971
972
|
)
|
|
972
973
|
measure.data_type = Date
|
vtlengine/Utils/__init__.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
from typing import Any, Dict
|
|
2
2
|
|
|
3
|
+
from pysdmx.model.dataflow import Role
|
|
4
|
+
|
|
3
5
|
from vtlengine.AST.Grammar.tokens import (
|
|
4
6
|
ABS,
|
|
5
7
|
AGGREGATE,
|
|
@@ -428,3 +430,50 @@ HA_UNARY_MAPPING = {
|
|
|
428
430
|
PLUS: HRUnPlus,
|
|
429
431
|
MINUS: HRUnMinus,
|
|
430
432
|
}
|
|
433
|
+
VTL_DTYPES_MAPPING = {
|
|
434
|
+
"String": "String",
|
|
435
|
+
"Alpha": "String",
|
|
436
|
+
"AlphaNumeric": "String",
|
|
437
|
+
"Numeric": "String",
|
|
438
|
+
"BigInteger": "Integer",
|
|
439
|
+
"Integer": "Integer",
|
|
440
|
+
"Long": "Integer",
|
|
441
|
+
"Short": "Integer",
|
|
442
|
+
"Decimal": "Number",
|
|
443
|
+
"Float": "Number",
|
|
444
|
+
"Double": "Number",
|
|
445
|
+
"Boolean": "Boolean",
|
|
446
|
+
"URI": "String",
|
|
447
|
+
"Count": "Integer",
|
|
448
|
+
"InclusiveValueRange": "Number",
|
|
449
|
+
"ExclusiveValueRange": "Number",
|
|
450
|
+
"Incremental": "Number",
|
|
451
|
+
"ObservationalTimePeriod": "Time_Period",
|
|
452
|
+
"StandardTimePeriod": "Time_Period",
|
|
453
|
+
"BasicTimePeriod": "Date",
|
|
454
|
+
"GregorianTimePeriod": "Date",
|
|
455
|
+
"GregorianYear": "Date",
|
|
456
|
+
"GregorianYearMonth": "Date",
|
|
457
|
+
"GregorianMonth": "Date",
|
|
458
|
+
"GregorianDay": "Date",
|
|
459
|
+
"ReportingTimePeriod": "Time_Period",
|
|
460
|
+
"ReportingYear": "Time_Period",
|
|
461
|
+
"ReportingSemester": "Time_Period",
|
|
462
|
+
"ReportingTrimester": "Time_Period",
|
|
463
|
+
"ReportingQuarter": "Time_Period",
|
|
464
|
+
"ReportingMonth": "Time_Period",
|
|
465
|
+
"ReportingWeek": "Time_Period",
|
|
466
|
+
"ReportingDay": "Time_Period",
|
|
467
|
+
"DateTime": "Date",
|
|
468
|
+
"TimeRange": "Time",
|
|
469
|
+
"Month": "String",
|
|
470
|
+
"MonthDay": "String",
|
|
471
|
+
"Day": "String",
|
|
472
|
+
"Time": "String",
|
|
473
|
+
"Duration": "Duration",
|
|
474
|
+
}
|
|
475
|
+
VTL_ROLE_MAPPING = {
|
|
476
|
+
Role.DIMENSION: "Identifier",
|
|
477
|
+
Role.MEASURE: "Measure",
|
|
478
|
+
Role.ATTRIBUTE: "Attribute",
|
|
479
|
+
}
|
vtlengine/__init__.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
-
from vtlengine.API import run, semantic_analysis
|
|
1
|
+
from vtlengine.API import generate_sdmx, prettify, run, run_sdmx, semantic_analysis
|
|
2
2
|
|
|
3
|
-
__all__ = ["semantic_analysis", "run"]
|
|
3
|
+
__all__ = ["semantic_analysis", "run", "generate_sdmx", "run_sdmx", "prettify"]
|
|
4
|
+
|
|
5
|
+
__version__ = "1.1.1"
|
|
@@ -42,14 +42,9 @@ def _validate_csv_path(components: Dict[str, Component], csv_path: Path) -> None
|
|
|
42
42
|
raise Exception(f"Path {csv_path} is not a file.")
|
|
43
43
|
register_rfc()
|
|
44
44
|
try:
|
|
45
|
-
with open(csv_path, "r") as f:
|
|
45
|
+
with open(csv_path, "r", errors="replace", encoding="utf-8") as f:
|
|
46
46
|
reader = DictReader(f, dialect="rfc")
|
|
47
47
|
csv_columns = reader.fieldnames
|
|
48
|
-
|
|
49
|
-
except UnicodeDecodeError as error:
|
|
50
|
-
# https://coderwall.com/p/stzy9w/raising-unicodeencodeerror-and-unicodedecodeerror-
|
|
51
|
-
# manually-for-testing-purposes
|
|
52
|
-
raise InputValidationException("0-1-2-5", file=csv_path.name) from error
|
|
53
48
|
except InputValidationException as ie:
|
|
54
49
|
raise InputValidationException("{}".format(str(ie))) from None
|
|
55
50
|
except Exception as e:
|
|
@@ -110,21 +105,16 @@ def _sanitize_pandas_columns(
|
|
|
110
105
|
|
|
111
106
|
|
|
112
107
|
def _pandas_load_csv(components: Dict[str, Component], csv_path: Union[str, Path]) -> pd.DataFrame:
|
|
113
|
-
obj_dtypes = {comp_name:
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
except UnicodeDecodeError:
|
|
124
|
-
if isinstance(csv_path, Path):
|
|
125
|
-
raise InputValidationException(code="0-1-2-5", file=csv_path.name)
|
|
126
|
-
else:
|
|
127
|
-
raise InputValidationException(code="0-1-2-5", file=csv_path)
|
|
108
|
+
obj_dtypes = {comp_name: object for comp_name, comp in components.items()}
|
|
109
|
+
|
|
110
|
+
data = pd.read_csv(
|
|
111
|
+
csv_path,
|
|
112
|
+
dtype=obj_dtypes,
|
|
113
|
+
engine="c",
|
|
114
|
+
keep_default_na=False,
|
|
115
|
+
na_values=[""],
|
|
116
|
+
encoding_errors="replace",
|
|
117
|
+
)
|
|
128
118
|
|
|
129
119
|
return _sanitize_pandas_columns(components, csv_path, data)
|
|
130
120
|
|
|
@@ -170,13 +160,13 @@ def _validate_pandas(
|
|
|
170
160
|
)
|
|
171
161
|
elif comp.data_type == Integer:
|
|
172
162
|
data[comp_name] = data[comp_name].map(
|
|
173
|
-
lambda x: Integer.cast(float(x)), na_action="ignore"
|
|
163
|
+
lambda x: Integer.cast(float(str(x))), na_action="ignore"
|
|
174
164
|
)
|
|
175
165
|
elif comp.data_type == Number:
|
|
176
|
-
data[comp_name] = data[comp_name].map(lambda x: float(x), na_action="ignore")
|
|
166
|
+
data[comp_name] = data[comp_name].map(lambda x: float((str(x))), na_action="ignore")
|
|
177
167
|
elif comp.data_type == Boolean:
|
|
178
168
|
data[comp_name] = data[comp_name].map(
|
|
179
|
-
lambda x: _parse_boolean(x), na_action="ignore"
|
|
169
|
+
lambda x: _parse_boolean(str(x)), na_action="ignore"
|
|
180
170
|
)
|
|
181
171
|
elif comp.data_type == Duration:
|
|
182
172
|
values_correct = (
|
|
@@ -192,7 +182,7 @@ def _validate_pandas(
|
|
|
192
182
|
values_correct = (
|
|
193
183
|
data[comp_name]
|
|
194
184
|
.map(
|
|
195
|
-
lambda x: x.replace(" ", "") in PERIOD_IND_MAPPING,
|
|
185
|
+
lambda x: x.replace(" ", "") in PERIOD_IND_MAPPING, # type: ignore[union-attr]
|
|
196
186
|
na_action="ignore",
|
|
197
187
|
)
|
|
198
188
|
.all()
|
|
@@ -207,7 +197,7 @@ def _validate_pandas(
|
|
|
207
197
|
data[comp_name] = data[comp_name].map(
|
|
208
198
|
lambda x: str(x).replace('"', ""), na_action="ignore"
|
|
209
199
|
)
|
|
210
|
-
data[comp_name] = data[comp_name].astype(
|
|
200
|
+
data[comp_name] = data[comp_name].astype(object, errors="raise")
|
|
211
201
|
|
|
212
202
|
except ValueError:
|
|
213
203
|
str_comp = SCALAR_TYPES_CLASS_REVERSE[comp.data_type] if comp else "Null"
|
vtlengine/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: vtlengine
|
|
3
|
+
Version: 1.1.1
|
|
4
|
+
Summary: Run and Validate VTL Scripts
|
|
5
|
+
License: AGPL-3.0
|
|
6
|
+
Keywords: vtl,sdmx,vtlengine,Validation and Transformation Language
|
|
7
|
+
Author: MeaningfulData
|
|
8
|
+
Author-email: info@meaningfuldata.eu
|
|
9
|
+
Maintainer: Francisco Javier Hernandez del Caño
|
|
10
|
+
Maintainer-email: javier.hernandez@meaningfuldata.eu
|
|
11
|
+
Requires-Python: >=3.9
|
|
12
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Information Technology
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: Typing :: Typed
|
|
17
|
+
Provides-Extra: all
|
|
18
|
+
Provides-Extra: s3
|
|
19
|
+
Requires-Dist: antlr4-python3-runtime (>=4.9.2,<4.10)
|
|
20
|
+
Requires-Dist: duckdb (>=1.1,<1.2)
|
|
21
|
+
Requires-Dist: fsspec (>=2022.11.0,<2023.0) ; extra == "all"
|
|
22
|
+
Requires-Dist: fsspec (>=2022.11.0,<2023.0) ; extra == "s3"
|
|
23
|
+
Requires-Dist: jsonschema (>=3.2.0,<5.0)
|
|
24
|
+
Requires-Dist: networkx (>=2.8,<3.0)
|
|
25
|
+
Requires-Dist: numpy (>=1.23.2,<2) ; python_version < "3.13"
|
|
26
|
+
Requires-Dist: numpy (>=2.1.0) ; python_version >= "3.13"
|
|
27
|
+
Requires-Dist: pandas (>=2.1.4,<3.0)
|
|
28
|
+
Requires-Dist: pysdmx[xml] (>=1.3.0,<2.0)
|
|
29
|
+
Requires-Dist: s3fs (>=2022.11.0,<2023.0) ; extra == "all"
|
|
30
|
+
Requires-Dist: s3fs (>=2022.11.0,<2023.0) ; extra == "s3"
|
|
31
|
+
Requires-Dist: sqlglot (>=22.2.0,<23.0)
|
|
32
|
+
Project-URL: Authors, https://github.com/Meaningful-Data/vtlengine/graphs/contributors
|
|
33
|
+
Project-URL: Documentation, https://docs.vtlengine.meaningfuldata.eu
|
|
34
|
+
Project-URL: IssueTracker, https://github.com/Meaningful-Data/vtlengine/issues
|
|
35
|
+
Project-URL: MeaningfulData, https://www.meaningfuldata.eu/
|
|
36
|
+
Project-URL: Repository, https://github.com/Meaningful-Data/vtlengine
|
|
37
|
+
Description-Content-Type: text/markdown
|
|
38
|
+
|
|
39
|
+
# VTL Engine
|
|
40
|
+
|
|
41
|
+
| | |
|
|
42
|
+
|--------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|
43
|
+
| Testing | [](https://github.com/Meaningful-Data/vtlengine/actions/workflows/testing.yml) |
|
|
44
|
+
| Package | [](https://pypi.org/project/vtlengine/) |
|
|
45
|
+
| License | [](https://github.com/Meaningful-Data/vtlengine/blob/main/LICENSE.md) |
|
|
46
|
+
| Mentioned in | [](http://www.awesomeofficialstatistics.org) |
|
|
47
|
+
|
|
48
|
+
## Introduction
|
|
49
|
+
|
|
50
|
+
The VTL Engine is a Python library that allows you to validate, format and execute VTL scripts.
|
|
51
|
+
|
|
52
|
+
It is a Python-based library around
|
|
53
|
+
the [VTL Language 2.1](https://sdmx-twg.github.io/vtl/2.1/html/index.html).
|
|
54
|
+
|
|
55
|
+
## Useful Links
|
|
56
|
+
|
|
57
|
+
- [MeaningfulData: who we are](https://www.meaningfuldata.eu)
|
|
58
|
+
- [Documentation](https://docs.vtlengine.meaningfuldata.eu)
|
|
59
|
+
- [Source Code](https://github.com/Meaningful-Data/vtlengine)
|
|
60
|
+
- [Bug Tracker](https://github.com/Meaningful-Data/vtlengine/issues?q=is%3Aopen+is%3Aissue+label%3Abug)
|
|
61
|
+
- [New features Tracker](https://github.com/Meaningful-Data/vtlengine/issues?q=is%3Aopen+is%3Aissue+label%3Aenhancement)
|
|
62
|
+
|
|
63
|
+
## Installation
|
|
64
|
+
|
|
65
|
+
### Requirements
|
|
66
|
+
|
|
67
|
+
The VTL Engine requires Python 3.9 or higher.
|
|
68
|
+
|
|
69
|
+
### Install with pip
|
|
70
|
+
|
|
71
|
+
To install the VTL Engine on any Operating System, you can use pip:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
|
|
75
|
+
pip install vtlengine
|
|
76
|
+
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
*Note: it is recommended to install the VTL Engine in a virtual environment.*
|
|
80
|
+
|
|
81
|
+
### S3 extra
|
|
82
|
+
|
|
83
|
+
If you want to use the S3 functionality, you can install the VTL Engine with the `s3` extra:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
pip install vtlengine[s3]
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Documentation
|
|
90
|
+
|
|
91
|
+
The documentation for the VTL Engine is available
|
|
92
|
+
at [docs.vtlengine.meaningfuldata.eu](https://docs.vtlengine.meaningfuldata.eu).
|
|
@@ -1,16 +1,18 @@
|
|
|
1
|
-
vtlengine/API/_InternalApi.py,sha256=
|
|
2
|
-
vtlengine/API/__init__.py,sha256=
|
|
1
|
+
vtlengine/API/_InternalApi.py,sha256=I4Y11qx66ru9P77TySHtN9ZfJiOiRRcNhzdJVLdSOok,22938
|
|
2
|
+
vtlengine/API/__init__.py,sha256=IiJZWSlHpUWq73Qv1_V-Tirim-ZnpF3xexFtW1Psyx8,17866
|
|
3
3
|
vtlengine/API/data/schema/json_schema_2.1.json,sha256=v3-C0Xnq8qScJSPAtLgb3rjKMrd3nz-bIxgZdTSEUiU,4336
|
|
4
|
-
vtlengine/AST/
|
|
5
|
-
vtlengine/AST/
|
|
6
|
-
vtlengine/AST/ASTConstructorModules/
|
|
7
|
-
vtlengine/AST/ASTConstructorModules/
|
|
8
|
-
vtlengine/AST/ASTConstructorModules/
|
|
4
|
+
vtlengine/AST/ASTComment.py,sha256=bAJW7aaqBXU2LqMtRvL_XOttdl1AFZufa15vmQdvNlY,1667
|
|
5
|
+
vtlengine/AST/ASTConstructor.py,sha256=X55I98BKG1ItyGIDObF9ALVfCcWnU-0wwCWJsiPILkg,21488
|
|
6
|
+
vtlengine/AST/ASTConstructorModules/Expr.py,sha256=aCL3uuQF0BJIels6rTckL8FAAykzImYb3AESs7umFcY,70066
|
|
7
|
+
vtlengine/AST/ASTConstructorModules/ExprComponents.py,sha256=2Ft4e5w2NtbfaqSNW8I9qSpG9iUaPIfdug7yYWo2gqE,38553
|
|
8
|
+
vtlengine/AST/ASTConstructorModules/Terminals.py,sha256=7zWDx_SFcbnL35G7Y0qZwl-lLEsfqReyzBX0UxwTCOk,27054
|
|
9
|
+
vtlengine/AST/ASTConstructorModules/__init__.py,sha256=J6g6NhJD8j0Ek1YmpethxRiFdjhLxUTM0mc3NHRFLlM,1879
|
|
9
10
|
vtlengine/AST/ASTDataExchange.py,sha256=kPSz21DGbEv-2bZowObseqf2d2_iQj1VnrqWuD9ZwtA,140
|
|
10
|
-
vtlengine/AST/ASTEncoders.py,sha256
|
|
11
|
-
vtlengine/AST/
|
|
11
|
+
vtlengine/AST/ASTEncoders.py,sha256=-Ar6a0GqMdJZK4CtZ1pUpIeGv57oSdN5qy3-aF0Zt9c,948
|
|
12
|
+
vtlengine/AST/ASTString.py,sha256=mFZzkT5XO2p21ptt7nv3iBefJOcNsvuoWqwqaxfxMOc,25936
|
|
13
|
+
vtlengine/AST/ASTTemplate.py,sha256=qUkz0AE1ay3gFrCidzhJAqxRnZR8nj98DOKAW2rXoso,12961
|
|
12
14
|
vtlengine/AST/ASTVisitor.py,sha256=3QQTudBpbR4pPQdH7y07EgwuzhoGzNQ59qox8R-E3fM,500
|
|
13
|
-
vtlengine/AST/DAG/__init__.py,sha256=
|
|
15
|
+
vtlengine/AST/DAG/__init__.py,sha256=ViL1vfLOCU28Yx8cOMt8aIvguSrzYYTb9qPhAwoExwY,15074
|
|
14
16
|
vtlengine/AST/DAG/_words.py,sha256=lEuBQ_w-KoKGna-x3gFGfbX1KP4Ez5EgdomH2LOeodk,170
|
|
15
17
|
vtlengine/AST/Grammar/Vtl.g4,sha256=86bBWjQLCHZSuB5iLIk0JZRgMyMg0n7xbU8qzot2cIE,26313
|
|
16
18
|
vtlengine/AST/Grammar/VtlTokens.g4,sha256=SwDR_59U25APqslczFcvTUiPoH7bC6kGaH2GkJ3kYzA,9972
|
|
@@ -19,21 +21,21 @@ vtlengine/AST/Grammar/lexer.py,sha256=ncoPevKkUpGyYx5mVKcKjocVhFoKSdu-5NSQDPY2V3
|
|
|
19
21
|
vtlengine/AST/Grammar/parser.py,sha256=ISi5OWmPbLitMp-8fg-wa1-475TfKZWK98jXjyOLi-8,634355
|
|
20
22
|
vtlengine/AST/Grammar/tokens.py,sha256=YF7tO0nF2zYC-VaBAJLyc6VitM72CvYfFQpoPDGCMzo,3139
|
|
21
23
|
vtlengine/AST/VtlVisitor.py,sha256=NJfXJVP6wNmasJmPLlojFqm9R5VSamOAKg_w7BMrhac,35332
|
|
22
|
-
vtlengine/AST/__init__.py,sha256=
|
|
24
|
+
vtlengine/AST/__init__.py,sha256=JnPilognG2rT2gtpjD4OwKFX0O3ZqvV-ic8gJxRu7Xo,11672
|
|
23
25
|
vtlengine/DataTypes/TimeHandling.py,sha256=CYnC0sb1qbRjTnCSsA3wgez7QftOzrXHxbuZXlY3O3Q,20151
|
|
24
26
|
vtlengine/DataTypes/__init__.py,sha256=LYXrde68bYm7MLeMLmr4haeOTSE4Fnpq9G2Ewy7DiaU,23084
|
|
25
27
|
vtlengine/Exceptions/__init__.py,sha256=rSSskV_qCBFzg_W67Q1QBAL7Lnq88D7yi2BDYo1hytw,4727
|
|
26
|
-
vtlengine/Exceptions/messages.py,sha256=
|
|
27
|
-
vtlengine/Interpreter/__init__.py,sha256=
|
|
28
|
-
vtlengine/Model/__init__.py,sha256=
|
|
29
|
-
vtlengine/Operators/Aggregation.py,sha256=
|
|
30
|
-
vtlengine/Operators/Analytic.py,sha256=
|
|
28
|
+
vtlengine/Exceptions/messages.py,sha256=9Tzkm-Q4ZI7UFFmWfsiy2xI7hFKMrnPB-EmUfVgxuBo,19428
|
|
29
|
+
vtlengine/Interpreter/__init__.py,sha256=yFXLi3Mr7EnOmdynf-BvFwDHOBsWVjRXSkNgdmhfJVc,83533
|
|
30
|
+
vtlengine/Model/__init__.py,sha256=xWrwhdUOj8Y-5x38zP5XnmFPw8IkBVBBG2bPsUBGLA8,15869
|
|
31
|
+
vtlengine/Operators/Aggregation.py,sha256=43bqjaMqGG9zzFkcs6JLfShb1ISupmyQnXOQQ-HQo9E,11906
|
|
32
|
+
vtlengine/Operators/Analytic.py,sha256=GiVNwa02JNRaVcHEkqKlat9WSIgQ32OhpgOdYc9PlJo,12818
|
|
31
33
|
vtlengine/Operators/Assignment.py,sha256=xyJgGPoFYbq6mzX06gz7Q7L8jXJxpUkgzdY3Lrne2hw,793
|
|
32
34
|
vtlengine/Operators/Boolean.py,sha256=3U5lHkxW5d7QQdGDNxXeXqejlPfFrXKG8_TqknrC8Ls,2856
|
|
33
35
|
vtlengine/Operators/CastOperator.py,sha256=mvWfNhJ1pEEk_ZQp-3unLoYJvJShUjUu_BOYQ6ByySI,16951
|
|
34
36
|
vtlengine/Operators/Clause.py,sha256=_Sdt3qQUpphNRs4IQW5pSj9kagzwLluV9BRHMGNxqsI,15022
|
|
35
37
|
vtlengine/Operators/Comparison.py,sha256=7G2UK1BDCDJR4jTXa-txJlAJEvzXEeYaDSA_2oxjgKY,17286
|
|
36
|
-
vtlengine/Operators/Conditional.py,sha256=
|
|
38
|
+
vtlengine/Operators/Conditional.py,sha256=N-HRmSJ_m_mdHgNPk2JV5uqHPMdz2pXSoJE036mRqtg,19882
|
|
37
39
|
vtlengine/Operators/General.py,sha256=q1fpqP4IYEwURXi8Eo-_j5AUktK0dvNosL9SgSe7a8w,6711
|
|
38
40
|
vtlengine/Operators/HROperators.py,sha256=VVp5FcdbDXhU_VCfUA6t75bs51qx9fKJT4n15WM2vyM,8866
|
|
39
41
|
vtlengine/Operators/Join.py,sha256=df2XG2tKmha_WUhHEYhgZIVc_2L8Wr45o0ISm-HOReA,18108
|
|
@@ -41,19 +43,20 @@ vtlengine/Operators/Numeric.py,sha256=icYTWzEsw6VQFLYc5Wucgr8961d8ZwTFx_wfZ8Wp9C
|
|
|
41
43
|
vtlengine/Operators/RoleSetter.py,sha256=mHZIdcHC3wflj81ekLbioDG1f8yHZXYDQFymV-KnyXA,2274
|
|
42
44
|
vtlengine/Operators/Set.py,sha256=f1uLeY4XZF0cWEwpXRB_CczgbXr6s33DYPuFt39HlEg,7084
|
|
43
45
|
vtlengine/Operators/String.py,sha256=ghWtYl6oUEAAzynY1a9Hg4yqRA9Sa7uk2B6iF9uuSqQ,20230
|
|
44
|
-
vtlengine/Operators/Time.py,sha256=
|
|
46
|
+
vtlengine/Operators/Time.py,sha256=4oVoRnGiOCQjBU9oBTe8LozTNr6A1xNigQJui7J4EL0,42680
|
|
45
47
|
vtlengine/Operators/Validation.py,sha256=ev3HyU7e1XbeAtUQ1y6zY3fzBwMqetDPhG3NNveAGOE,9988
|
|
46
48
|
vtlengine/Operators/__init__.py,sha256=GN5eaAwmzfYKD7JJRIaRqdIJzflGc3UMvrOC9mlYNVo,37227
|
|
47
|
-
vtlengine/Utils/__init__.py,sha256=
|
|
49
|
+
vtlengine/Utils/__init__.py,sha256=zhGPJA8MjHmtEEwMS4CxEFYL0tk2L5F0YPn7bitdRzM,8954
|
|
48
50
|
vtlengine/__extras_check.py,sha256=Wr-lxGZhXJZEacVV5cUkvKt7XM-mry0kYAe3VxNrVcY,614
|
|
49
|
-
vtlengine/__init__.py,sha256=
|
|
51
|
+
vtlengine/__init__.py,sha256=St9OIn1YjTLffx957I4gjwWlf8W5b4xj0oGCad6Stk8,188
|
|
50
52
|
vtlengine/files/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
53
|
vtlengine/files/output/__init__.py,sha256=4tmf-p1Y1u5Ohrwt3clQA-FMGaijKI3HC_iwn3H9J8c,1250
|
|
52
54
|
vtlengine/files/output/_time_period_representation.py,sha256=D5XCSXyEuX_aBzTvBV3sZxACcgwXz2Uu_YH3loMP8q0,1610
|
|
53
|
-
vtlengine/files/parser/__init__.py,sha256=
|
|
54
|
-
vtlengine/files/parser/_rfc_dialect.py,sha256=
|
|
55
|
+
vtlengine/files/parser/__init__.py,sha256=JamEIWI0pFZxT0sKYE6Fii8H2JQcsFn4Nf3T0OLSm9g,8637
|
|
56
|
+
vtlengine/files/parser/_rfc_dialect.py,sha256=Y8kAYBxH_t9AieN_tYg7QRh5A4DgvabKarx9Ko3QeCQ,462
|
|
55
57
|
vtlengine/files/parser/_time_checking.py,sha256=UAC_Pv-eQJKrhgTguWb--xfqMMs6quyMeiAkGBt_vgI,4725
|
|
56
|
-
vtlengine
|
|
57
|
-
vtlengine-1.
|
|
58
|
-
vtlengine-1.
|
|
59
|
-
vtlengine-1.
|
|
58
|
+
vtlengine/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
59
|
+
vtlengine-1.1.1.dist-info/LICENSE.md,sha256=2xqHuoHohba7gpcZZKtOICRjzeKsQANXG8WoV9V35KM,33893
|
|
60
|
+
vtlengine-1.1.1.dist-info/METADATA,sha256=Csw4dEroIdEu00IUA5thIjqB_1mkDaXTETM7tvN7GjA,4102
|
|
61
|
+
vtlengine-1.1.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
62
|
+
vtlengine-1.1.1.dist-info/RECORD,,
|