tracdap-runtime 0.6.4__py3-none-any.whl → 0.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_exec/context.py +382 -29
- tracdap/rt/_exec/dev_mode.py +123 -94
- tracdap/rt/_exec/engine.py +120 -9
- tracdap/rt/_exec/functions.py +125 -20
- tracdap/rt/_exec/graph.py +38 -13
- tracdap/rt/_exec/graph_builder.py +120 -9
- tracdap/rt/_impl/data.py +115 -49
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +74 -30
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +120 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +12 -10
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.pyi +14 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +29 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.pyi +16 -0
- tracdap/rt/_impl/models.py +8 -0
- tracdap/rt/_impl/static_api.py +16 -0
- tracdap/rt/_impl/storage.py +37 -25
- tracdap/rt/_impl/validation.py +76 -7
- tracdap/rt/_plugins/repo_git.py +1 -1
- tracdap/rt/_version.py +1 -1
- tracdap/rt/api/experimental.py +220 -0
- tracdap/rt/api/hook.py +4 -0
- tracdap/rt/api/model_api.py +48 -6
- tracdap/rt/config/__init__.py +2 -2
- tracdap/rt/config/common.py +6 -0
- tracdap/rt/metadata/__init__.py +25 -20
- tracdap/rt/metadata/job.py +54 -0
- tracdap/rt/metadata/model.py +18 -0
- tracdap/rt/metadata/resource.py +24 -0
- {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.5.dist-info}/METADATA +3 -1
- {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.5.dist-info}/RECORD +33 -29
- {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.5.dist-info}/LICENSE +0 -0
- {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.5.dist-info}/WHEEL +0 -0
- {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.5.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,22 @@
|
|
1
1
|
from tracdap.rt._impl.grpc.tracdap.metadata import type_pb2 as _type_pb2
|
2
2
|
from tracdap.rt._impl.grpc.tracdap.metadata import data_pb2 as _data_pb2
|
3
3
|
from google.protobuf.internal import containers as _containers
|
4
|
+
from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
|
4
5
|
from google.protobuf import descriptor as _descriptor
|
5
6
|
from google.protobuf import message as _message
|
6
7
|
from typing import ClassVar as _ClassVar, Mapping as _Mapping, Optional as _Optional, Union as _Union
|
7
8
|
|
8
9
|
DESCRIPTOR: _descriptor.FileDescriptor
|
9
10
|
|
11
|
+
class ModelType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
12
|
+
__slots__ = ()
|
13
|
+
STANDARD_MODEL: _ClassVar[ModelType]
|
14
|
+
DATA_IMPORT_MODEL: _ClassVar[ModelType]
|
15
|
+
DATA_EXPORT_MODEL: _ClassVar[ModelType]
|
16
|
+
STANDARD_MODEL: ModelType
|
17
|
+
DATA_IMPORT_MODEL: ModelType
|
18
|
+
DATA_EXPORT_MODEL: ModelType
|
19
|
+
|
10
20
|
class ModelParameter(_message.Message):
|
11
21
|
__slots__ = ("paramType", "label", "defaultValue", "paramProps")
|
12
22
|
class ParamPropsEntry(_message.Message):
|
@@ -69,7 +79,7 @@ class ModelOutputSchema(_message.Message):
|
|
69
79
|
def __init__(self, schema: _Optional[_Union[_data_pb2.SchemaDefinition, _Mapping]] = ..., label: _Optional[str] = ..., optional: bool = ..., dynamic: bool = ..., outputProps: _Optional[_Mapping[str, _type_pb2.Value]] = ...) -> None: ...
|
70
80
|
|
71
81
|
class ModelDefinition(_message.Message):
|
72
|
-
__slots__ = ("language", "repository", "packageGroup", "package", "version", "entryPoint", "path", "parameters", "inputs", "outputs", "staticAttributes")
|
82
|
+
__slots__ = ("language", "repository", "packageGroup", "package", "version", "entryPoint", "path", "parameters", "inputs", "outputs", "staticAttributes", "modelType")
|
73
83
|
class ParametersEntry(_message.Message):
|
74
84
|
__slots__ = ("key", "value")
|
75
85
|
KEY_FIELD_NUMBER: _ClassVar[int]
|
@@ -109,6 +119,7 @@ class ModelDefinition(_message.Message):
|
|
109
119
|
INPUTS_FIELD_NUMBER: _ClassVar[int]
|
110
120
|
OUTPUTS_FIELD_NUMBER: _ClassVar[int]
|
111
121
|
STATICATTRIBUTES_FIELD_NUMBER: _ClassVar[int]
|
122
|
+
MODELTYPE_FIELD_NUMBER: _ClassVar[int]
|
112
123
|
language: str
|
113
124
|
repository: str
|
114
125
|
packageGroup: str
|
@@ -120,4 +131,5 @@ class ModelDefinition(_message.Message):
|
|
120
131
|
inputs: _containers.MessageMap[str, ModelInputSchema]
|
121
132
|
outputs: _containers.MessageMap[str, ModelOutputSchema]
|
122
133
|
staticAttributes: _containers.MessageMap[str, _type_pb2.Value]
|
123
|
-
|
134
|
+
modelType: ModelType
|
135
|
+
def __init__(self, language: _Optional[str] = ..., repository: _Optional[str] = ..., packageGroup: _Optional[str] = ..., package: _Optional[str] = ..., version: _Optional[str] = ..., entryPoint: _Optional[str] = ..., path: _Optional[str] = ..., parameters: _Optional[_Mapping[str, ModelParameter]] = ..., inputs: _Optional[_Mapping[str, ModelInputSchema]] = ..., outputs: _Optional[_Mapping[str, ModelOutputSchema]] = ..., staticAttributes: _Optional[_Mapping[str, _type_pb2.Value]] = ..., modelType: _Optional[_Union[ModelType, str]] = ...) -> None: ...
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
3
|
+
# source: tracdap/rt/_impl/grpc/tracdap/metadata/resource.proto
|
4
|
+
# Protobuf Python Version: 4.25.3
|
5
|
+
"""Generated protocol buffer code."""
|
6
|
+
from google.protobuf import descriptor as _descriptor
|
7
|
+
from google.protobuf import descriptor_pool as _descriptor_pool
|
8
|
+
from google.protobuf import symbol_database as _symbol_database
|
9
|
+
from google.protobuf.internal import builder as _builder
|
10
|
+
# @@protoc_insertion_point(imports)
|
11
|
+
|
12
|
+
_sym_db = _symbol_database.Default()
|
13
|
+
|
14
|
+
|
15
|
+
from tracdap.rt._impl.grpc.tracdap.metadata import object_id_pb2 as tracdap_dot_rt_dot___impl_dot_grpc_dot_tracdap_dot_metadata_dot_object__id__pb2
|
16
|
+
from tracdap.rt._impl.grpc.tracdap.metadata import object_pb2 as tracdap_dot_rt_dot___impl_dot_grpc_dot_tracdap_dot_metadata_dot_object__pb2
|
17
|
+
|
18
|
+
|
19
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n5tracdap/rt/_impl/grpc/tracdap/metadata/resource.proto\x12\x10tracdap.metadata\x1a\x36tracdap/rt/_impl/grpc/tracdap/metadata/object_id.proto\x1a\x33tracdap/rt/_impl/grpc/tracdap/metadata/object.proto*m\n\x0cResourceType\x12\x19\n\x15RESOURCE_TYPE_NOT_SET\x10\x00\x12\x14\n\x10MODEL_REPOSITORY\x10\x01\x12\x14\n\x10INTERNAL_STORAGE\x10\x02\"\x04\x08\x03\x10\x03*\x10\x45XTERNAL_STORAGEB\x1e\n\x1aorg.finos.tracdap.metadataP\x01\x62\x06proto3')
|
20
|
+
|
21
|
+
_globals = globals()
|
22
|
+
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
23
|
+
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'tracdap.rt._impl.grpc.tracdap.metadata.resource_pb2', _globals)
|
24
|
+
if _descriptor._USE_C_DESCRIPTORS == False:
|
25
|
+
_globals['DESCRIPTOR']._options = None
|
26
|
+
_globals['DESCRIPTOR']._serialized_options = b'\n\032org.finos.tracdap.metadataP\001'
|
27
|
+
_globals['_RESOURCETYPE']._serialized_start=184
|
28
|
+
_globals['_RESOURCETYPE']._serialized_end=293
|
29
|
+
# @@protoc_insertion_point(module_scope)
|
@@ -0,0 +1,16 @@
|
|
1
|
+
from tracdap.rt._impl.grpc.tracdap.metadata import object_id_pb2 as _object_id_pb2
|
2
|
+
from tracdap.rt._impl.grpc.tracdap.metadata import object_pb2 as _object_pb2
|
3
|
+
from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
|
4
|
+
from google.protobuf import descriptor as _descriptor
|
5
|
+
from typing import ClassVar as _ClassVar
|
6
|
+
|
7
|
+
DESCRIPTOR: _descriptor.FileDescriptor
|
8
|
+
|
9
|
+
class ResourceType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
10
|
+
__slots__ = ()
|
11
|
+
RESOURCE_TYPE_NOT_SET: _ClassVar[ResourceType]
|
12
|
+
MODEL_REPOSITORY: _ClassVar[ResourceType]
|
13
|
+
INTERNAL_STORAGE: _ClassVar[ResourceType]
|
14
|
+
RESOURCE_TYPE_NOT_SET: ResourceType
|
15
|
+
MODEL_REPOSITORY: ResourceType
|
16
|
+
INTERNAL_STORAGE: ResourceType
|
tracdap/rt/_impl/models.py
CHANGED
@@ -19,6 +19,7 @@ import pathlib
|
|
19
19
|
import copy
|
20
20
|
|
21
21
|
import tracdap.rt.api as _api
|
22
|
+
import tracdap.rt.api.experimental as _eapi
|
22
23
|
import tracdap.rt.metadata as _meta
|
23
24
|
import tracdap.rt.config as _cfg
|
24
25
|
import tracdap.rt.exceptions as _ex
|
@@ -208,6 +209,13 @@ class ModelLoader:
|
|
208
209
|
model_def.inputs = inputs
|
209
210
|
model_def.outputs = outputs
|
210
211
|
|
212
|
+
if isinstance(model, _eapi.TracDataImport):
|
213
|
+
model_def.modelType = _meta.ModelType.DATA_IMPORT_MODEL
|
214
|
+
elif isinstance(model, _eapi.TracDataExport):
|
215
|
+
model_def.modelType = _meta.ModelType.DATA_EXPORT_MODEL
|
216
|
+
else:
|
217
|
+
model_def.modelType = _meta.ModelType.STANDARD_MODEL
|
218
|
+
|
211
219
|
_val.quick_validate_model_def(model_def)
|
212
220
|
|
213
221
|
for attr_name, attr_value in attributes.items():
|
tracdap/rt/_impl/static_api.py
CHANGED
@@ -17,6 +17,7 @@ import types as _ts
|
|
17
17
|
|
18
18
|
import tracdap.rt.metadata as _meta
|
19
19
|
import tracdap.rt.exceptions as _ex
|
20
|
+
import tracdap.rt._impl.data as _data
|
20
21
|
import tracdap.rt._impl.schemas as _schemas
|
21
22
|
import tracdap.rt._impl.type_system as _type_system
|
22
23
|
import tracdap.rt._impl.validation as _val
|
@@ -152,6 +153,21 @@ class StaticApiImpl(_StaticApiHook):
|
|
152
153
|
|
153
154
|
return _schemas.SchemaLoader.load_schema(package, schema_file)
|
154
155
|
|
156
|
+
def infer_schema(self, dataset: _tp.Any) -> _meta.SchemaDefinition:
|
157
|
+
|
158
|
+
if _data.pandas and isinstance(dataset, _data.pandas.DataFrame):
|
159
|
+
arrow_schema = _data.DataMapping.pandas_to_arrow_schema(dataset)
|
160
|
+
|
161
|
+
elif _data.polars and isinstance(dataset, _data.polars.DataFrame):
|
162
|
+
arrow_schema = _data.DataMapping.polars_to_arrow_schema(dataset)
|
163
|
+
|
164
|
+
else:
|
165
|
+
dataset_type = f"{type(dataset).__module__}.{type(dataset).__name__}"
|
166
|
+
message = f"Schema inference is not available for dataset type [{dataset_type}]"
|
167
|
+
raise _ex.ERuntimeValidation(message)
|
168
|
+
|
169
|
+
return _data.DataMapping.arrow_to_trac_schema(arrow_schema)
|
170
|
+
|
155
171
|
def define_input_table(
|
156
172
|
self, *fields: _tp.Union[_meta.FieldSchema, _tp.List[_meta.FieldSchema]],
|
157
173
|
label: _tp.Optional[str] = None, optional: bool = False, dynamic: bool = False,
|
tracdap/rt/_impl/storage.py
CHANGED
@@ -30,6 +30,7 @@ import tracdap.rt.exceptions as _ex
|
|
30
30
|
import tracdap.rt.ext.plugins as plugins
|
31
31
|
import tracdap.rt._impl.data as _data
|
32
32
|
import tracdap.rt._impl.util as _util
|
33
|
+
import tracdap.rt._impl.validation as _val
|
33
34
|
|
34
35
|
# Import storage interfaces
|
35
36
|
from tracdap.rt.ext.storage import *
|
@@ -41,7 +42,11 @@ class FormatManager:
|
|
41
42
|
def get_data_format(cls, format_code: str, format_options: tp.Dict[str, tp.Any]) -> IDataFormat:
|
42
43
|
|
43
44
|
try:
|
44
|
-
|
45
|
+
|
46
|
+
config = _cfg.PluginConfig(
|
47
|
+
protocol=format_code,
|
48
|
+
properties=format_options)
|
49
|
+
|
45
50
|
return plugins.PluginManager.load_plugin(IDataFormat, config)
|
46
51
|
|
47
52
|
except _ex.EPluginNotAvailable as e:
|
@@ -73,11 +78,18 @@ class StorageManager:
|
|
73
78
|
self.__log = _util.logger_for_object(self)
|
74
79
|
self.__file_storage: tp.Dict[str, IFileStorage] = dict()
|
75
80
|
self.__data_storage: tp.Dict[str, IDataStorage] = dict()
|
81
|
+
self.__external: tp.List[str] = list()
|
76
82
|
self.__settings = sys_config.storage
|
77
83
|
|
78
84
|
for storage_key, storage_config in sys_config.storage.buckets.items():
|
79
85
|
self.create_storage(storage_key, storage_config)
|
80
86
|
|
87
|
+
for storage_key, storage_config in sys_config.storage.external.items():
|
88
|
+
if storage_key in self.__file_storage or storage_key in self.__data_storage:
|
89
|
+
raise _ex.EConfig(f"Storage key [{storage_key}] is defined as both internal and external storage")
|
90
|
+
self.__external.append(storage_key)
|
91
|
+
self.create_storage(storage_key, storage_config)
|
92
|
+
|
81
93
|
def default_storage_key(self):
|
82
94
|
return self.__settings.defaultBucket
|
83
95
|
|
@@ -147,26 +159,32 @@ class StorageManager:
|
|
147
159
|
self.__file_storage[storage_key] = file_storage
|
148
160
|
self.__data_storage[storage_key] = data_storage
|
149
161
|
|
150
|
-
def has_file_storage(self, storage_key: str) -> bool:
|
162
|
+
def has_file_storage(self, storage_key: str, external: bool = False) -> bool:
|
163
|
+
|
164
|
+
if external ^ (storage_key in self.__external):
|
165
|
+
return False
|
151
166
|
|
152
167
|
return storage_key in self.__file_storage
|
153
168
|
|
154
|
-
def get_file_storage(self, storage_key: str) -> IFileStorage:
|
169
|
+
def get_file_storage(self, storage_key: str, external: bool = False) -> IFileStorage:
|
155
170
|
|
156
|
-
if not self.has_file_storage(storage_key):
|
171
|
+
if not self.has_file_storage(storage_key, external):
|
157
172
|
err = f"File storage is not configured for storage key [{storage_key}]"
|
158
173
|
self.__log.error(err)
|
159
174
|
raise _ex.EStorageConfig(err)
|
160
175
|
|
161
176
|
return self.__file_storage[storage_key]
|
162
177
|
|
163
|
-
def has_data_storage(self, storage_key: str) -> bool:
|
178
|
+
def has_data_storage(self, storage_key: str, external: bool = False) -> bool:
|
179
|
+
|
180
|
+
if external ^ (storage_key in self.__external):
|
181
|
+
return False
|
164
182
|
|
165
183
|
return storage_key in self.__data_storage
|
166
184
|
|
167
|
-
def get_data_storage(self, storage_key: str) -> IDataStorage:
|
185
|
+
def get_data_storage(self, storage_key: str, external: bool = False) -> IDataStorage:
|
168
186
|
|
169
|
-
if not self.has_data_storage(storage_key):
|
187
|
+
if not self.has_data_storage(storage_key, external):
|
170
188
|
err = f"Data storage is not configured for storage key [{storage_key}]"
|
171
189
|
self.__log.error(err)
|
172
190
|
raise _ex.EStorageConfig(err)
|
@@ -587,29 +605,27 @@ class CommonFileStorage(IFileStorage):
|
|
587
605
|
|
588
606
|
try:
|
589
607
|
|
590
|
-
if
|
608
|
+
if _val.StorageValidator.storage_path_is_empty(storage_path):
|
591
609
|
raise self._explicit_error(self.ExplicitError.STORAGE_PATH_NULL_OR_BLANK, operation_name, storage_path)
|
592
610
|
|
593
|
-
if
|
611
|
+
if _val.StorageValidator.storage_path_invalid(storage_path):
|
594
612
|
raise self._explicit_error(self.ExplicitError.STORAGE_PATH_INVALID, operation_name, storage_path)
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
if relative_path.is_absolute():
|
613
|
+
|
614
|
+
if _val.StorageValidator.storage_path_not_relative(storage_path):
|
599
615
|
raise self._explicit_error(self.ExplicitError.STORAGE_PATH_NOT_RELATIVE, operation_name, storage_path)
|
600
616
|
|
617
|
+
if _val.StorageValidator.storage_path_outside_root(storage_path):
|
618
|
+
raise self._explicit_error(self.ExplicitError.STORAGE_PATH_OUTSIDE_ROOT, operation_name, storage_path)
|
619
|
+
|
620
|
+
if not allow_root_dir and _val.StorageValidator.storage_path_is_root(storage_path):
|
621
|
+
raise self._explicit_error(self.ExplicitError.STORAGE_PATH_IS_ROOT, operation_name, storage_path)
|
622
|
+
|
601
623
|
root_path = pathlib.Path("C:\\root") if _util.is_windows() else pathlib.Path("/root")
|
624
|
+
relative_path = pathlib.Path(storage_path)
|
602
625
|
absolute_path = root_path.joinpath(relative_path).resolve(False)
|
603
626
|
|
604
627
|
if absolute_path == root_path:
|
605
|
-
|
606
|
-
raise self._explicit_error(self.ExplicitError.STORAGE_PATH_IS_ROOT, operation_name, storage_path)
|
607
|
-
else:
|
608
|
-
return ""
|
609
|
-
|
610
|
-
# is_relative_to only supported in Python 3.9+, we need to support 3.7
|
611
|
-
if root_path not in absolute_path.parents:
|
612
|
-
raise self._explicit_error(self.ExplicitError.STORAGE_PATH_OUTSIDE_ROOT, operation_name, storage_path)
|
628
|
+
return ""
|
613
629
|
else:
|
614
630
|
return absolute_path.relative_to(root_path).as_posix()
|
615
631
|
|
@@ -639,10 +655,6 @@ class CommonFileStorage(IFileStorage):
|
|
639
655
|
|
640
656
|
return err
|
641
657
|
|
642
|
-
_ILLEGAL_PATH_CHARS_WINDOWS = re.compile(r".*[\x00<>:\"\'|?*].*")
|
643
|
-
_ILLEGAL_PATH_CHARS_POSIX = re.compile(r".*[\x00<>:\"\'|?*\\].*")
|
644
|
-
_ILLEGAL_PATH_CHARS = _ILLEGAL_PATH_CHARS_WINDOWS if _util.is_windows() else _ILLEGAL_PATH_CHARS_POSIX
|
645
|
-
|
646
658
|
class ExplicitError(enum.Enum):
|
647
659
|
|
648
660
|
# Validation failures
|
tracdap/rt/_impl/validation.py
CHANGED
@@ -15,7 +15,9 @@
|
|
15
15
|
import inspect
|
16
16
|
import logging
|
17
17
|
import re
|
18
|
+
import types
|
18
19
|
import typing as tp
|
20
|
+
import pathlib
|
19
21
|
|
20
22
|
import tracdap.rt.metadata as meta
|
21
23
|
import tracdap.rt.exceptions as ex
|
@@ -25,6 +27,11 @@ import tracdap.rt._impl.util as util
|
|
25
27
|
from tracdap.rt.api.hook import _Named # noqa
|
26
28
|
|
27
29
|
|
30
|
+
def require_package(module_name: str, module_obj: types.ModuleType):
|
31
|
+
if module_obj is None:
|
32
|
+
raise ex.ERuntimeValidation(f"Optional package [{module_name}] is not installed")
|
33
|
+
|
34
|
+
|
28
35
|
def validate_signature(method: tp.Callable, *args, **kwargs):
|
29
36
|
_TypeValidator.validate_signature(method, *args, **kwargs)
|
30
37
|
|
@@ -62,15 +69,19 @@ class _TypeValidator:
|
|
62
69
|
signature = inspect.signature(method)
|
63
70
|
cls.__method_cache[method.__name__] = signature
|
64
71
|
|
72
|
+
hints = tp.get_type_hints(method)
|
73
|
+
|
65
74
|
positional_index = 0
|
66
75
|
|
67
76
|
for param_name, param in signature.parameters.items():
|
68
77
|
|
78
|
+
param_type = hints.get(param_name)
|
79
|
+
|
69
80
|
values = cls._select_arg(method.__name__, param, positional_index, *args, **kwargs)
|
70
81
|
positional_index += len(values)
|
71
82
|
|
72
83
|
for value in values:
|
73
|
-
cls._validate_arg(method.__name__,
|
84
|
+
cls._validate_arg(method.__name__, param_name, param_type, value)
|
74
85
|
|
75
86
|
@classmethod
|
76
87
|
def validate_return_type(cls, method: tp.Callable, value: tp.Any):
|
@@ -146,14 +157,18 @@ class _TypeValidator:
|
|
146
157
|
raise ex.EUnexpected("Invalid method signature in runtime API (this is a bug)")
|
147
158
|
|
148
159
|
@classmethod
|
149
|
-
def _validate_arg(cls, method_name: str,
|
160
|
+
def _validate_arg(cls, method_name: str, param_name: str, param_type: tp.Type, value: tp.Any):
|
150
161
|
|
151
|
-
if not cls._validate_type(
|
162
|
+
if not cls._validate_type(param_type, value):
|
152
163
|
|
153
|
-
expected_type = cls._type_name(
|
164
|
+
expected_type = cls._type_name(param_type)
|
154
165
|
actual_type = cls._type_name(type(value)) if value is not None else str(None)
|
155
166
|
|
156
|
-
|
167
|
+
if expected_type == actual_type:
|
168
|
+
expected_type = cls._type_name(param_type, qualified=True)
|
169
|
+
actual_type = cls._type_name(type(value), qualified=True)
|
170
|
+
|
171
|
+
err = f"Invalid API call [{method_name}()]: Wrong type for [{param_name}]" \
|
157
172
|
+ f" (expected [{expected_type}], got [{actual_type}])"
|
158
173
|
|
159
174
|
cls._log.error(err)
|
@@ -210,7 +225,7 @@ class _TypeValidator:
|
|
210
225
|
return isinstance(value, expected_type)
|
211
226
|
|
212
227
|
@classmethod
|
213
|
-
def _type_name(cls, type_var: tp.Type) -> str:
|
228
|
+
def _type_name(cls, type_var: tp.Type, qualified: bool = False) -> str:
|
214
229
|
|
215
230
|
if isinstance(type_var, cls.__generic_metaclass):
|
216
231
|
|
@@ -230,7 +245,10 @@ class _TypeValidator:
|
|
230
245
|
|
231
246
|
raise ex.ETracInternal(f"Validation of [{origin.__name__}] generic parameters is not supported yet")
|
232
247
|
|
233
|
-
|
248
|
+
if qualified:
|
249
|
+
return f"{type_var.__module__}.{type_var.__name__}"
|
250
|
+
else:
|
251
|
+
return type_var.__name__
|
234
252
|
|
235
253
|
|
236
254
|
class StaticValidator:
|
@@ -458,3 +476,54 @@ class StaticValidator:
|
|
458
476
|
def _fail(cls, message: str):
|
459
477
|
cls._log.error(message)
|
460
478
|
raise ex.EModelValidation(message)
|
479
|
+
|
480
|
+
|
481
|
+
class StorageValidator:
|
482
|
+
|
483
|
+
__ILLEGAL_PATH_CHARS_WINDOWS = re.compile(r".*[\x00<>:\"\'|?*].*")
|
484
|
+
__ILLEGAL_PATH_CHARS_POSIX = re.compile(r".*[\x00<>:\"\'|?*\\].*")
|
485
|
+
__ILLEGAL_PATH_CHARS = __ILLEGAL_PATH_CHARS_WINDOWS if util.is_windows() else __ILLEGAL_PATH_CHARS_POSIX
|
486
|
+
|
487
|
+
@classmethod
|
488
|
+
def storage_path_is_empty(cls, storage_path: str):
|
489
|
+
|
490
|
+
return storage_path is None or len(storage_path.strip()) == 0
|
491
|
+
|
492
|
+
@classmethod
|
493
|
+
def storage_path_invalid(cls, storage_path: str):
|
494
|
+
|
495
|
+
if cls.__ILLEGAL_PATH_CHARS.match(storage_path):
|
496
|
+
return True
|
497
|
+
|
498
|
+
try:
|
499
|
+
# Make sure the path can be interpreted as a path
|
500
|
+
pathlib.Path(storage_path)
|
501
|
+
return False
|
502
|
+
except ValueError:
|
503
|
+
return True
|
504
|
+
|
505
|
+
@classmethod
|
506
|
+
def storage_path_not_relative(cls, storage_path: str):
|
507
|
+
|
508
|
+
relative_path = pathlib.Path(storage_path)
|
509
|
+
return relative_path.is_absolute()
|
510
|
+
|
511
|
+
@classmethod
|
512
|
+
def storage_path_outside_root(cls, storage_path: str):
|
513
|
+
|
514
|
+
# is_relative_to only supported in Python 3.9+, we need to support 3.8
|
515
|
+
|
516
|
+
root_path = pathlib.Path("C:\\root") if util.is_windows() else pathlib.Path("/root")
|
517
|
+
relative_path = pathlib.Path(storage_path)
|
518
|
+
absolute_path = root_path.joinpath(relative_path).resolve(False)
|
519
|
+
|
520
|
+
return root_path != absolute_path and root_path not in absolute_path.parents
|
521
|
+
|
522
|
+
@classmethod
|
523
|
+
def storage_path_is_root(cls, storage_path: str):
|
524
|
+
|
525
|
+
root_path = pathlib.Path("C:\\root") if util.is_windows() else pathlib.Path("/root")
|
526
|
+
relative_path = pathlib.Path(storage_path)
|
527
|
+
absolute_path = root_path.joinpath(relative_path).resolve(False)
|
528
|
+
|
529
|
+
return root_path == absolute_path
|
tracdap/rt/_plugins/repo_git.py
CHANGED
tracdap/rt/_version.py
CHANGED
@@ -0,0 +1,220 @@
|
|
1
|
+
# Copyright 2024 Accenture Global Solutions Limited
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
import abc as _abc
|
16
|
+
import dataclasses as _dc
|
17
|
+
import datetime as _dt
|
18
|
+
import enum as _enum
|
19
|
+
import typing as _tp
|
20
|
+
|
21
|
+
from tracdap.rt.api import *
|
22
|
+
from .hook import _StaticApiHook
|
23
|
+
|
24
|
+
|
25
|
+
_DATA_FRAMEWORK = _tp.TypeVar('_DATA_FRAMEWORK')
|
26
|
+
|
27
|
+
|
28
|
+
class _DataFramework(_tp.Generic[_DATA_FRAMEWORK]):
|
29
|
+
|
30
|
+
PANDAS: "_DataFramework"
|
31
|
+
POLARS: "_DataFramework"
|
32
|
+
|
33
|
+
def __init__(self, framework_name, framework_type: _DATA_FRAMEWORK):
|
34
|
+
self.__framework_name = framework_name
|
35
|
+
self.__framework_type = framework_type
|
36
|
+
|
37
|
+
def __str__(self):
|
38
|
+
return self.__framework_name
|
39
|
+
|
40
|
+
|
41
|
+
if _tp.TYPE_CHECKING:
|
42
|
+
|
43
|
+
if pandas:
|
44
|
+
_DataFramework.PANDAS = _DataFramework('pandas', pandas.DataFrame)
|
45
|
+
"""The original Python dataframe library, most widely used"""
|
46
|
+
else:
|
47
|
+
_DataFramework.PANDAS = _DataFramework('pandas', None)
|
48
|
+
"""Pandas data framework is not installed"""
|
49
|
+
|
50
|
+
if polars:
|
51
|
+
_DataFramework.POLARS = _DataFramework('polars', polars.DataFrame)
|
52
|
+
"""A modern, fast and simple alternative to Pandas"""
|
53
|
+
else:
|
54
|
+
_DataFramework.POLARS = _DataFramework('polars', None)
|
55
|
+
"""Polars data framework is not installed"""
|
56
|
+
|
57
|
+
else:
|
58
|
+
|
59
|
+
_DataFramework.PANDAS = _DataFramework('pandas', None)
|
60
|
+
_DataFramework.POLARS = _DataFramework('polars', None)
|
61
|
+
|
62
|
+
PANDAS = _DataFramework.PANDAS
|
63
|
+
POLARS = _DataFramework.POLARS
|
64
|
+
|
65
|
+
|
66
|
+
class TracContext(TracContext):
|
67
|
+
|
68
|
+
@_abc.abstractmethod
|
69
|
+
def get_table(self, dataset_name: str, framework: _DataFramework[_DATA_FRAMEWORK]) -> _DATA_FRAMEWORK:
|
70
|
+
|
71
|
+
pass
|
72
|
+
|
73
|
+
@_abc.abstractmethod
|
74
|
+
def put_table(self, dataset_name: str, dataset: _DATA_FRAMEWORK):
|
75
|
+
|
76
|
+
pass
|
77
|
+
|
78
|
+
|
79
|
+
def init_static():
|
80
|
+
import tracdap.rt._impl.static_api as _static_impl # noqa
|
81
|
+
_static_impl.StaticApiImpl.register_impl()
|
82
|
+
|
83
|
+
|
84
|
+
def infer_schema(dataset: _tp.Any) -> SchemaDefinition:
|
85
|
+
sa = _StaticApiHook.get_instance()
|
86
|
+
return sa.infer_schema(dataset)
|
87
|
+
|
88
|
+
|
89
|
+
class FileType(_enum.Enum):
|
90
|
+
|
91
|
+
FILE = 1
|
92
|
+
DIRECTORY = 2
|
93
|
+
|
94
|
+
|
95
|
+
@_dc.dataclass
|
96
|
+
class FileStat:
|
97
|
+
|
98
|
+
"""
|
99
|
+
Dataclass to represent some basic file stat info independent of the storage technology used
|
100
|
+
I.e. do not depend on Python stat_result class that refers to locally-mounted filesystems
|
101
|
+
Timestamps are held in UTC
|
102
|
+
"""
|
103
|
+
|
104
|
+
file_name: str
|
105
|
+
file_type: FileType
|
106
|
+
storage_path: str
|
107
|
+
size: int
|
108
|
+
|
109
|
+
mtime: _tp.Optional[_dt.datetime] = None
|
110
|
+
atime: _tp.Optional[_dt.datetime] = None
|
111
|
+
|
112
|
+
|
113
|
+
class TracFileStorage:
|
114
|
+
|
115
|
+
@_abc.abstractmethod
|
116
|
+
def get_storage_key(self) -> str:
|
117
|
+
pass
|
118
|
+
|
119
|
+
@_abc.abstractmethod
|
120
|
+
def exists(self, storage_path: str) -> bool:
|
121
|
+
"""The exists method can be used for both files and directories"""
|
122
|
+
pass
|
123
|
+
|
124
|
+
@_abc.abstractmethod
|
125
|
+
def size(self, storage_path: str) -> int:
|
126
|
+
"""The rm method only works on regular files, it cannot be used for directories"""
|
127
|
+
pass
|
128
|
+
|
129
|
+
@_abc.abstractmethod
|
130
|
+
def stat(self, storage_path: str) -> FileStat:
|
131
|
+
"""The stat method can be used for both files and directories, so long as they exist"""
|
132
|
+
pass
|
133
|
+
|
134
|
+
@_abc.abstractmethod
|
135
|
+
def ls(self, storage_path: str, recursive: bool = False) -> _tp.List[FileStat]:
|
136
|
+
"""The ls method only works on directories, it cannot be used for regular files"""
|
137
|
+
pass
|
138
|
+
|
139
|
+
@_abc.abstractmethod
|
140
|
+
def mkdir(self, storage_path: str, recursive: bool = False):
|
141
|
+
"""The mkdir method will succeed silently if the directory already exists"""
|
142
|
+
pass
|
143
|
+
|
144
|
+
@_abc.abstractmethod
|
145
|
+
def rm(self, storage_path: str):
|
146
|
+
"""The rm method only works on regular files, it cannot be used for directories and is not recursive"""
|
147
|
+
pass
|
148
|
+
|
149
|
+
@_abc.abstractmethod
|
150
|
+
def rmdir(self, storage_path: str):
|
151
|
+
"""The rmdir method only works on directories and is always recursive"""
|
152
|
+
pass
|
153
|
+
|
154
|
+
@_abc.abstractmethod
|
155
|
+
def read_byte_stream(self, storage_path: str) -> _tp.ContextManager[_tp.BinaryIO]:
|
156
|
+
"""The read_byte_stream method only works for existing files"""
|
157
|
+
pass
|
158
|
+
|
159
|
+
@_abc.abstractmethod
|
160
|
+
def write_byte_stream(self, storage_path: str) -> _tp.ContextManager[_tp.BinaryIO]:
|
161
|
+
"""The write_byte_stream method will always overwrite an existing file if it exists"""
|
162
|
+
pass
|
163
|
+
|
164
|
+
def read_bytes(self, storage_path: str) -> bytes:
|
165
|
+
"""The read_bytes method only works for existing files"""
|
166
|
+
with self.read_byte_stream(storage_path) as stream:
|
167
|
+
return stream.read()
|
168
|
+
|
169
|
+
def write_bytes(self, storage_path: str, data: bytes):
|
170
|
+
"""The write_bytes method will always overwrite an existing file if it exists"""
|
171
|
+
with self.write_byte_stream(storage_path) as stream:
|
172
|
+
stream.write(data)
|
173
|
+
|
174
|
+
|
175
|
+
|
176
|
+
class TracDataContext(TracContext):
|
177
|
+
|
178
|
+
@_abc.abstractmethod
|
179
|
+
def get_file_storage(self, storage_key: str) -> TracFileStorage:
|
180
|
+
pass
|
181
|
+
|
182
|
+
@_abc.abstractmethod
|
183
|
+
def get_data_storage(self, storage_key: str) -> None:
|
184
|
+
pass
|
185
|
+
|
186
|
+
@_abc.abstractmethod
|
187
|
+
def add_data_import(self, dataset_key: str):
|
188
|
+
pass
|
189
|
+
|
190
|
+
@_abc.abstractmethod
|
191
|
+
def set_source_metadata(self, dataset_key: str, storage_key: str, source_info: FileStat):
|
192
|
+
pass
|
193
|
+
|
194
|
+
@_abc.abstractmethod
|
195
|
+
def set_attribute(self, dataset_key: str, attribute_name: str, value: _tp.Any):
|
196
|
+
pass
|
197
|
+
|
198
|
+
@_abc.abstractmethod
|
199
|
+
def set_schema(self, dataset_key: str, schema: SchemaDefinition):
|
200
|
+
pass
|
201
|
+
|
202
|
+
|
203
|
+
class TracDataImport(TracModel):
|
204
|
+
|
205
|
+
def define_inputs(self) -> _tp.Dict[str, ModelInputSchema]:
|
206
|
+
return dict()
|
207
|
+
|
208
|
+
@_abc.abstractmethod
|
209
|
+
def run_model(self, ctx: TracDataContext):
|
210
|
+
pass
|
211
|
+
|
212
|
+
|
213
|
+
class TracDataExport(TracModel):
|
214
|
+
|
215
|
+
def define_outputs(self) -> _tp.Dict[str, ModelOutputSchema]:
|
216
|
+
return dict()
|
217
|
+
|
218
|
+
@_abc.abstractmethod
|
219
|
+
def run_model(self, ctx: TracDataContext):
|
220
|
+
pass
|
tracdap/rt/api/hook.py
CHANGED
@@ -118,6 +118,10 @@ class _StaticApiHook:
|
|
118
118
|
|
119
119
|
pass
|
120
120
|
|
121
|
+
@_abc.abstractmethod
|
122
|
+
def infer_schema(self, dataset: _tp.Any) -> _meta.SchemaDefinition:
|
123
|
+
pass
|
124
|
+
|
121
125
|
@_abc.abstractmethod
|
122
126
|
def define_input_table(
|
123
127
|
self, *fields: _tp.Union[_meta.FieldSchema, _tp.List[_meta.FieldSchema]],
|