tracdap-runtime 0.6.3__py3-none-any.whl → 0.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracdap/rt/_exec/context.py +572 -112
- tracdap/rt/_exec/dev_mode.py +166 -97
- tracdap/rt/_exec/engine.py +120 -9
- tracdap/rt/_exec/functions.py +137 -35
- tracdap/rt/_exec/graph.py +38 -13
- tracdap/rt/_exec/graph_builder.py +120 -9
- tracdap/rt/_impl/data.py +183 -52
- tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +18 -18
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +74 -30
- tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +120 -2
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +20 -18
- tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.pyi +22 -6
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +29 -0
- tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.pyi +16 -0
- tracdap/rt/_impl/models.py +8 -0
- tracdap/rt/_impl/static_api.py +42 -10
- tracdap/rt/_impl/storage.py +37 -25
- tracdap/rt/_impl/validation.py +113 -11
- tracdap/rt/_plugins/repo_git.py +1 -1
- tracdap/rt/_version.py +1 -1
- tracdap/rt/api/experimental.py +220 -0
- tracdap/rt/api/hook.py +6 -4
- tracdap/rt/api/model_api.py +98 -13
- tracdap/rt/api/static_api.py +14 -6
- tracdap/rt/config/__init__.py +2 -2
- tracdap/rt/config/common.py +23 -17
- tracdap/rt/config/job.py +2 -2
- tracdap/rt/config/platform.py +25 -25
- tracdap/rt/config/result.py +2 -2
- tracdap/rt/config/runtime.py +3 -3
- tracdap/rt/launch/cli.py +7 -4
- tracdap/rt/launch/launch.py +19 -3
- tracdap/rt/metadata/__init__.py +25 -20
- tracdap/rt/metadata/common.py +2 -2
- tracdap/rt/metadata/custom.py +3 -3
- tracdap/rt/metadata/data.py +12 -12
- tracdap/rt/metadata/file.py +6 -6
- tracdap/rt/metadata/flow.py +6 -6
- tracdap/rt/metadata/job.py +62 -8
- tracdap/rt/metadata/model.py +33 -11
- tracdap/rt/metadata/object_id.py +8 -8
- tracdap/rt/metadata/resource.py +24 -0
- tracdap/rt/metadata/search.py +5 -5
- tracdap/rt/metadata/stoarge.py +6 -6
- tracdap/rt/metadata/tag.py +1 -1
- tracdap/rt/metadata/tag_update.py +1 -1
- tracdap/rt/metadata/type.py +4 -4
- {tracdap_runtime-0.6.3.dist-info → tracdap_runtime-0.6.5.dist-info}/METADATA +3 -1
- {tracdap_runtime-0.6.3.dist-info → tracdap_runtime-0.6.5.dist-info}/RECORD +52 -48
- {tracdap_runtime-0.6.3.dist-info → tracdap_runtime-0.6.5.dist-info}/LICENSE +0 -0
- {tracdap_runtime-0.6.3.dist-info → tracdap_runtime-0.6.5.dist-info}/WHEEL +0 -0
- {tracdap_runtime-0.6.3.dist-info → tracdap_runtime-0.6.5.dist-info}/top_level.txt +0 -0
tracdap/rt/_impl/storage.py
CHANGED
@@ -30,6 +30,7 @@ import tracdap.rt.exceptions as _ex
|
|
30
30
|
import tracdap.rt.ext.plugins as plugins
|
31
31
|
import tracdap.rt._impl.data as _data
|
32
32
|
import tracdap.rt._impl.util as _util
|
33
|
+
import tracdap.rt._impl.validation as _val
|
33
34
|
|
34
35
|
# Import storage interfaces
|
35
36
|
from tracdap.rt.ext.storage import *
|
@@ -41,7 +42,11 @@ class FormatManager:
|
|
41
42
|
def get_data_format(cls, format_code: str, format_options: tp.Dict[str, tp.Any]) -> IDataFormat:
|
42
43
|
|
43
44
|
try:
|
44
|
-
|
45
|
+
|
46
|
+
config = _cfg.PluginConfig(
|
47
|
+
protocol=format_code,
|
48
|
+
properties=format_options)
|
49
|
+
|
45
50
|
return plugins.PluginManager.load_plugin(IDataFormat, config)
|
46
51
|
|
47
52
|
except _ex.EPluginNotAvailable as e:
|
@@ -73,11 +78,18 @@ class StorageManager:
|
|
73
78
|
self.__log = _util.logger_for_object(self)
|
74
79
|
self.__file_storage: tp.Dict[str, IFileStorage] = dict()
|
75
80
|
self.__data_storage: tp.Dict[str, IDataStorage] = dict()
|
81
|
+
self.__external: tp.List[str] = list()
|
76
82
|
self.__settings = sys_config.storage
|
77
83
|
|
78
84
|
for storage_key, storage_config in sys_config.storage.buckets.items():
|
79
85
|
self.create_storage(storage_key, storage_config)
|
80
86
|
|
87
|
+
for storage_key, storage_config in sys_config.storage.external.items():
|
88
|
+
if storage_key in self.__file_storage or storage_key in self.__data_storage:
|
89
|
+
raise _ex.EConfig(f"Storage key [{storage_key}] is defined as both internal and external storage")
|
90
|
+
self.__external.append(storage_key)
|
91
|
+
self.create_storage(storage_key, storage_config)
|
92
|
+
|
81
93
|
def default_storage_key(self):
|
82
94
|
return self.__settings.defaultBucket
|
83
95
|
|
@@ -147,26 +159,32 @@ class StorageManager:
|
|
147
159
|
self.__file_storage[storage_key] = file_storage
|
148
160
|
self.__data_storage[storage_key] = data_storage
|
149
161
|
|
150
|
-
def has_file_storage(self, storage_key: str) -> bool:
|
162
|
+
def has_file_storage(self, storage_key: str, external: bool = False) -> bool:
|
163
|
+
|
164
|
+
if external ^ (storage_key in self.__external):
|
165
|
+
return False
|
151
166
|
|
152
167
|
return storage_key in self.__file_storage
|
153
168
|
|
154
|
-
def get_file_storage(self, storage_key: str) -> IFileStorage:
|
169
|
+
def get_file_storage(self, storage_key: str, external: bool = False) -> IFileStorage:
|
155
170
|
|
156
|
-
if not self.has_file_storage(storage_key):
|
171
|
+
if not self.has_file_storage(storage_key, external):
|
157
172
|
err = f"File storage is not configured for storage key [{storage_key}]"
|
158
173
|
self.__log.error(err)
|
159
174
|
raise _ex.EStorageConfig(err)
|
160
175
|
|
161
176
|
return self.__file_storage[storage_key]
|
162
177
|
|
163
|
-
def has_data_storage(self, storage_key: str) -> bool:
|
178
|
+
def has_data_storage(self, storage_key: str, external: bool = False) -> bool:
|
179
|
+
|
180
|
+
if external ^ (storage_key in self.__external):
|
181
|
+
return False
|
164
182
|
|
165
183
|
return storage_key in self.__data_storage
|
166
184
|
|
167
|
-
def get_data_storage(self, storage_key: str) -> IDataStorage:
|
185
|
+
def get_data_storage(self, storage_key: str, external: bool = False) -> IDataStorage:
|
168
186
|
|
169
|
-
if not self.has_data_storage(storage_key):
|
187
|
+
if not self.has_data_storage(storage_key, external):
|
170
188
|
err = f"Data storage is not configured for storage key [{storage_key}]"
|
171
189
|
self.__log.error(err)
|
172
190
|
raise _ex.EStorageConfig(err)
|
@@ -587,29 +605,27 @@ class CommonFileStorage(IFileStorage):
|
|
587
605
|
|
588
606
|
try:
|
589
607
|
|
590
|
-
if
|
608
|
+
if _val.StorageValidator.storage_path_is_empty(storage_path):
|
591
609
|
raise self._explicit_error(self.ExplicitError.STORAGE_PATH_NULL_OR_BLANK, operation_name, storage_path)
|
592
610
|
|
593
|
-
if
|
611
|
+
if _val.StorageValidator.storage_path_invalid(storage_path):
|
594
612
|
raise self._explicit_error(self.ExplicitError.STORAGE_PATH_INVALID, operation_name, storage_path)
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
if relative_path.is_absolute():
|
613
|
+
|
614
|
+
if _val.StorageValidator.storage_path_not_relative(storage_path):
|
599
615
|
raise self._explicit_error(self.ExplicitError.STORAGE_PATH_NOT_RELATIVE, operation_name, storage_path)
|
600
616
|
|
617
|
+
if _val.StorageValidator.storage_path_outside_root(storage_path):
|
618
|
+
raise self._explicit_error(self.ExplicitError.STORAGE_PATH_OUTSIDE_ROOT, operation_name, storage_path)
|
619
|
+
|
620
|
+
if not allow_root_dir and _val.StorageValidator.storage_path_is_root(storage_path):
|
621
|
+
raise self._explicit_error(self.ExplicitError.STORAGE_PATH_IS_ROOT, operation_name, storage_path)
|
622
|
+
|
601
623
|
root_path = pathlib.Path("C:\\root") if _util.is_windows() else pathlib.Path("/root")
|
624
|
+
relative_path = pathlib.Path(storage_path)
|
602
625
|
absolute_path = root_path.joinpath(relative_path).resolve(False)
|
603
626
|
|
604
627
|
if absolute_path == root_path:
|
605
|
-
|
606
|
-
raise self._explicit_error(self.ExplicitError.STORAGE_PATH_IS_ROOT, operation_name, storage_path)
|
607
|
-
else:
|
608
|
-
return ""
|
609
|
-
|
610
|
-
# is_relative_to only supported in Python 3.9+, we need to support 3.7
|
611
|
-
if root_path not in absolute_path.parents:
|
612
|
-
raise self._explicit_error(self.ExplicitError.STORAGE_PATH_OUTSIDE_ROOT, operation_name, storage_path)
|
628
|
+
return ""
|
613
629
|
else:
|
614
630
|
return absolute_path.relative_to(root_path).as_posix()
|
615
631
|
|
@@ -639,10 +655,6 @@ class CommonFileStorage(IFileStorage):
|
|
639
655
|
|
640
656
|
return err
|
641
657
|
|
642
|
-
_ILLEGAL_PATH_CHARS_WINDOWS = re.compile(r".*[\x00<>:\"\'|?*].*")
|
643
|
-
_ILLEGAL_PATH_CHARS_POSIX = re.compile(r".*[\x00<>:\"\'|?*\\].*")
|
644
|
-
_ILLEGAL_PATH_CHARS = _ILLEGAL_PATH_CHARS_WINDOWS if _util.is_windows() else _ILLEGAL_PATH_CHARS_POSIX
|
645
|
-
|
646
658
|
class ExplicitError(enum.Enum):
|
647
659
|
|
648
660
|
# Validation failures
|
tracdap/rt/_impl/validation.py
CHANGED
@@ -15,7 +15,9 @@
|
|
15
15
|
import inspect
|
16
16
|
import logging
|
17
17
|
import re
|
18
|
+
import types
|
18
19
|
import typing as tp
|
20
|
+
import pathlib
|
19
21
|
|
20
22
|
import tracdap.rt.metadata as meta
|
21
23
|
import tracdap.rt.exceptions as ex
|
@@ -25,6 +27,11 @@ import tracdap.rt._impl.util as util
|
|
25
27
|
from tracdap.rt.api.hook import _Named # noqa
|
26
28
|
|
27
29
|
|
30
|
+
def require_package(module_name: str, module_obj: types.ModuleType):
|
31
|
+
if module_obj is None:
|
32
|
+
raise ex.ERuntimeValidation(f"Optional package [{module_name}] is not installed")
|
33
|
+
|
34
|
+
|
28
35
|
def validate_signature(method: tp.Callable, *args, **kwargs):
|
29
36
|
_TypeValidator.validate_signature(method, *args, **kwargs)
|
30
37
|
|
@@ -38,7 +45,7 @@ def check_type(expected_type: tp.Type, value: tp.Any) -> bool:
|
|
38
45
|
|
39
46
|
|
40
47
|
def quick_validate_model_def(model_def: meta.ModelDefinition):
|
41
|
-
|
48
|
+
StaticValidator.quick_validate_model_def(model_def)
|
42
49
|
|
43
50
|
|
44
51
|
class _TypeValidator:
|
@@ -62,15 +69,19 @@ class _TypeValidator:
|
|
62
69
|
signature = inspect.signature(method)
|
63
70
|
cls.__method_cache[method.__name__] = signature
|
64
71
|
|
72
|
+
hints = tp.get_type_hints(method)
|
73
|
+
|
65
74
|
positional_index = 0
|
66
75
|
|
67
76
|
for param_name, param in signature.parameters.items():
|
68
77
|
|
78
|
+
param_type = hints.get(param_name)
|
79
|
+
|
69
80
|
values = cls._select_arg(method.__name__, param, positional_index, *args, **kwargs)
|
70
81
|
positional_index += len(values)
|
71
82
|
|
72
83
|
for value in values:
|
73
|
-
cls._validate_arg(method.__name__,
|
84
|
+
cls._validate_arg(method.__name__, param_name, param_type, value)
|
74
85
|
|
75
86
|
@classmethod
|
76
87
|
def validate_return_type(cls, method: tp.Callable, value: tp.Any):
|
@@ -146,14 +157,18 @@ class _TypeValidator:
|
|
146
157
|
raise ex.EUnexpected("Invalid method signature in runtime API (this is a bug)")
|
147
158
|
|
148
159
|
@classmethod
|
149
|
-
def _validate_arg(cls, method_name: str,
|
160
|
+
def _validate_arg(cls, method_name: str, param_name: str, param_type: tp.Type, value: tp.Any):
|
150
161
|
|
151
|
-
if not cls._validate_type(
|
162
|
+
if not cls._validate_type(param_type, value):
|
152
163
|
|
153
|
-
expected_type = cls._type_name(
|
164
|
+
expected_type = cls._type_name(param_type)
|
154
165
|
actual_type = cls._type_name(type(value)) if value is not None else str(None)
|
155
166
|
|
156
|
-
|
167
|
+
if expected_type == actual_type:
|
168
|
+
expected_type = cls._type_name(param_type, qualified=True)
|
169
|
+
actual_type = cls._type_name(type(value), qualified=True)
|
170
|
+
|
171
|
+
err = f"Invalid API call [{method_name}()]: Wrong type for [{param_name}]" \
|
157
172
|
+ f" (expected [{expected_type}], got [{actual_type}])"
|
158
173
|
|
159
174
|
cls._log.error(err)
|
@@ -210,7 +225,7 @@ class _TypeValidator:
|
|
210
225
|
return isinstance(value, expected_type)
|
211
226
|
|
212
227
|
@classmethod
|
213
|
-
def _type_name(cls, type_var: tp.Type) -> str:
|
228
|
+
def _type_name(cls, type_var: tp.Type, qualified: bool = False) -> str:
|
214
229
|
|
215
230
|
if isinstance(type_var, cls.__generic_metaclass):
|
216
231
|
|
@@ -230,10 +245,13 @@ class _TypeValidator:
|
|
230
245
|
|
231
246
|
raise ex.ETracInternal(f"Validation of [{origin.__name__}] generic parameters is not supported yet")
|
232
247
|
|
233
|
-
|
248
|
+
if qualified:
|
249
|
+
return f"{type_var.__module__}.{type_var.__name__}"
|
250
|
+
else:
|
251
|
+
return type_var.__name__
|
234
252
|
|
235
253
|
|
236
|
-
class
|
254
|
+
class StaticValidator:
|
237
255
|
|
238
256
|
__identifier_pattern = re.compile("\\A[a-zA-Z_]\\w*\\Z", re.ASCII)
|
239
257
|
__reserved_identifier_pattern = re.compile("\\A(_|trac_)", re.ASCII)
|
@@ -301,6 +319,28 @@ class _StaticValidator:
|
|
301
319
|
cls._check_inputs_or_outputs(model_def.inputs)
|
302
320
|
cls._check_inputs_or_outputs(model_def.outputs)
|
303
321
|
|
322
|
+
@classmethod
|
323
|
+
def quick_validate_schema(cls, schema: meta.SchemaDefinition):
|
324
|
+
|
325
|
+
if schema.schemaType != meta.SchemaType.TABLE:
|
326
|
+
cls._fail(f"Unsupported schema type [{schema.schemaType}]")
|
327
|
+
|
328
|
+
if schema.partType != meta.PartType.PART_ROOT:
|
329
|
+
cls._fail(f"Unsupported partition type [{schema.partType}]")
|
330
|
+
|
331
|
+
if schema.table is None or schema.table.fields is None or len(schema.table.fields) == 0:
|
332
|
+
cls._fail(f"Table schema does not define any fields")
|
333
|
+
|
334
|
+
fields = schema.table.fields
|
335
|
+
field_names = list(map(lambda f: f.fieldName, fields))
|
336
|
+
property_type = f"field"
|
337
|
+
|
338
|
+
cls._valid_identifiers(field_names, property_type)
|
339
|
+
cls._case_insensitive_duplicates(field_names, property_type)
|
340
|
+
|
341
|
+
for field in fields:
|
342
|
+
cls._check_single_field(field, property_type)
|
343
|
+
|
304
344
|
@classmethod
|
305
345
|
def _check_label(cls, label, param_name):
|
306
346
|
if label is not None:
|
@@ -330,10 +370,20 @@ class _StaticValidator:
|
|
330
370
|
|
331
371
|
cls._log.info(f"Checking {input_name}")
|
332
372
|
|
373
|
+
if input_schema.dynamic:
|
374
|
+
if input_schema.schema and input_schema.schema.table:
|
375
|
+
error = "Dynamic schemas must have schema.table = None"
|
376
|
+
cls._fail(f"Invalid schema for [{input_name}]: {error}")
|
377
|
+
else:
|
378
|
+
continue
|
379
|
+
|
333
380
|
fields = input_schema.schema.table.fields
|
334
381
|
field_names = list(map(lambda f: f.fieldName, fields))
|
335
382
|
property_type = f"field in [{input_name}]"
|
336
383
|
|
384
|
+
if len(fields) == 0:
|
385
|
+
cls._fail(f"Invalid schema for [{input_name}]: No fields defined")
|
386
|
+
|
337
387
|
cls._valid_identifiers(field_names, property_type)
|
338
388
|
cls._case_insensitive_duplicates(field_names, property_type)
|
339
389
|
|
@@ -375,8 +425,9 @@ class _StaticValidator:
|
|
375
425
|
if field.categorical and field.fieldType != meta.BasicType.STRING:
|
376
426
|
cls._fail(f"Invalid {property_type}: [{field.fieldName}] fieldType {field.fieldType} used as categorical")
|
377
427
|
|
378
|
-
|
379
|
-
|
428
|
+
# Do not require notNull = True for business keys here
|
429
|
+
# Instead setting businessKey = True will cause notNull = True to be set during normalization
|
430
|
+
# This agrees with the semantics in platform API and CSV schema loader
|
380
431
|
|
381
432
|
@classmethod
|
382
433
|
def _valid_identifiers(cls, keys, property_type):
|
@@ -425,3 +476,54 @@ class _StaticValidator:
|
|
425
476
|
def _fail(cls, message: str):
|
426
477
|
cls._log.error(message)
|
427
478
|
raise ex.EModelValidation(message)
|
479
|
+
|
480
|
+
|
481
|
+
class StorageValidator:
|
482
|
+
|
483
|
+
__ILLEGAL_PATH_CHARS_WINDOWS = re.compile(r".*[\x00<>:\"\'|?*].*")
|
484
|
+
__ILLEGAL_PATH_CHARS_POSIX = re.compile(r".*[\x00<>:\"\'|?*\\].*")
|
485
|
+
__ILLEGAL_PATH_CHARS = __ILLEGAL_PATH_CHARS_WINDOWS if util.is_windows() else __ILLEGAL_PATH_CHARS_POSIX
|
486
|
+
|
487
|
+
@classmethod
|
488
|
+
def storage_path_is_empty(cls, storage_path: str):
|
489
|
+
|
490
|
+
return storage_path is None or len(storage_path.strip()) == 0
|
491
|
+
|
492
|
+
@classmethod
|
493
|
+
def storage_path_invalid(cls, storage_path: str):
|
494
|
+
|
495
|
+
if cls.__ILLEGAL_PATH_CHARS.match(storage_path):
|
496
|
+
return True
|
497
|
+
|
498
|
+
try:
|
499
|
+
# Make sure the path can be interpreted as a path
|
500
|
+
pathlib.Path(storage_path)
|
501
|
+
return False
|
502
|
+
except ValueError:
|
503
|
+
return True
|
504
|
+
|
505
|
+
@classmethod
|
506
|
+
def storage_path_not_relative(cls, storage_path: str):
|
507
|
+
|
508
|
+
relative_path = pathlib.Path(storage_path)
|
509
|
+
return relative_path.is_absolute()
|
510
|
+
|
511
|
+
@classmethod
|
512
|
+
def storage_path_outside_root(cls, storage_path: str):
|
513
|
+
|
514
|
+
# is_relative_to only supported in Python 3.9+, we need to support 3.8
|
515
|
+
|
516
|
+
root_path = pathlib.Path("C:\\root") if util.is_windows() else pathlib.Path("/root")
|
517
|
+
relative_path = pathlib.Path(storage_path)
|
518
|
+
absolute_path = root_path.joinpath(relative_path).resolve(False)
|
519
|
+
|
520
|
+
return root_path != absolute_path and root_path not in absolute_path.parents
|
521
|
+
|
522
|
+
@classmethod
|
523
|
+
def storage_path_is_root(cls, storage_path: str):
|
524
|
+
|
525
|
+
root_path = pathlib.Path("C:\\root") if util.is_windows() else pathlib.Path("/root")
|
526
|
+
relative_path = pathlib.Path(storage_path)
|
527
|
+
absolute_path = root_path.joinpath(relative_path).resolve(False)
|
528
|
+
|
529
|
+
return root_path == absolute_path
|
tracdap/rt/_plugins/repo_git.py
CHANGED
tracdap/rt/_version.py
CHANGED
@@ -0,0 +1,220 @@
|
|
1
|
+
# Copyright 2024 Accenture Global Solutions Limited
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
import abc as _abc
|
16
|
+
import dataclasses as _dc
|
17
|
+
import datetime as _dt
|
18
|
+
import enum as _enum
|
19
|
+
import typing as _tp
|
20
|
+
|
21
|
+
from tracdap.rt.api import *
|
22
|
+
from .hook import _StaticApiHook
|
23
|
+
|
24
|
+
|
25
|
+
_DATA_FRAMEWORK = _tp.TypeVar('_DATA_FRAMEWORK')
|
26
|
+
|
27
|
+
|
28
|
+
class _DataFramework(_tp.Generic[_DATA_FRAMEWORK]):
|
29
|
+
|
30
|
+
PANDAS: "_DataFramework"
|
31
|
+
POLARS: "_DataFramework"
|
32
|
+
|
33
|
+
def __init__(self, framework_name, framework_type: _DATA_FRAMEWORK):
|
34
|
+
self.__framework_name = framework_name
|
35
|
+
self.__framework_type = framework_type
|
36
|
+
|
37
|
+
def __str__(self):
|
38
|
+
return self.__framework_name
|
39
|
+
|
40
|
+
|
41
|
+
if _tp.TYPE_CHECKING:
|
42
|
+
|
43
|
+
if pandas:
|
44
|
+
_DataFramework.PANDAS = _DataFramework('pandas', pandas.DataFrame)
|
45
|
+
"""The original Python dataframe library, most widely used"""
|
46
|
+
else:
|
47
|
+
_DataFramework.PANDAS = _DataFramework('pandas', None)
|
48
|
+
"""Pandas data framework is not installed"""
|
49
|
+
|
50
|
+
if polars:
|
51
|
+
_DataFramework.POLARS = _DataFramework('polars', polars.DataFrame)
|
52
|
+
"""A modern, fast and simple alternative to Pandas"""
|
53
|
+
else:
|
54
|
+
_DataFramework.POLARS = _DataFramework('polars', None)
|
55
|
+
"""Polars data framework is not installed"""
|
56
|
+
|
57
|
+
else:
|
58
|
+
|
59
|
+
_DataFramework.PANDAS = _DataFramework('pandas', None)
|
60
|
+
_DataFramework.POLARS = _DataFramework('polars', None)
|
61
|
+
|
62
|
+
PANDAS = _DataFramework.PANDAS
|
63
|
+
POLARS = _DataFramework.POLARS
|
64
|
+
|
65
|
+
|
66
|
+
class TracContext(TracContext):
|
67
|
+
|
68
|
+
@_abc.abstractmethod
|
69
|
+
def get_table(self, dataset_name: str, framework: _DataFramework[_DATA_FRAMEWORK]) -> _DATA_FRAMEWORK:
|
70
|
+
|
71
|
+
pass
|
72
|
+
|
73
|
+
@_abc.abstractmethod
|
74
|
+
def put_table(self, dataset_name: str, dataset: _DATA_FRAMEWORK):
|
75
|
+
|
76
|
+
pass
|
77
|
+
|
78
|
+
|
79
|
+
def init_static():
|
80
|
+
import tracdap.rt._impl.static_api as _static_impl # noqa
|
81
|
+
_static_impl.StaticApiImpl.register_impl()
|
82
|
+
|
83
|
+
|
84
|
+
def infer_schema(dataset: _tp.Any) -> SchemaDefinition:
|
85
|
+
sa = _StaticApiHook.get_instance()
|
86
|
+
return sa.infer_schema(dataset)
|
87
|
+
|
88
|
+
|
89
|
+
class FileType(_enum.Enum):
|
90
|
+
|
91
|
+
FILE = 1
|
92
|
+
DIRECTORY = 2
|
93
|
+
|
94
|
+
|
95
|
+
@_dc.dataclass
|
96
|
+
class FileStat:
|
97
|
+
|
98
|
+
"""
|
99
|
+
Dataclass to represent some basic file stat info independent of the storage technology used
|
100
|
+
I.e. do not depend on Python stat_result class that refers to locally-mounted filesystems
|
101
|
+
Timestamps are held in UTC
|
102
|
+
"""
|
103
|
+
|
104
|
+
file_name: str
|
105
|
+
file_type: FileType
|
106
|
+
storage_path: str
|
107
|
+
size: int
|
108
|
+
|
109
|
+
mtime: _tp.Optional[_dt.datetime] = None
|
110
|
+
atime: _tp.Optional[_dt.datetime] = None
|
111
|
+
|
112
|
+
|
113
|
+
class TracFileStorage:
|
114
|
+
|
115
|
+
@_abc.abstractmethod
|
116
|
+
def get_storage_key(self) -> str:
|
117
|
+
pass
|
118
|
+
|
119
|
+
@_abc.abstractmethod
|
120
|
+
def exists(self, storage_path: str) -> bool:
|
121
|
+
"""The exists method can be used for both files and directories"""
|
122
|
+
pass
|
123
|
+
|
124
|
+
@_abc.abstractmethod
|
125
|
+
def size(self, storage_path: str) -> int:
|
126
|
+
"""The rm method only works on regular files, it cannot be used for directories"""
|
127
|
+
pass
|
128
|
+
|
129
|
+
@_abc.abstractmethod
|
130
|
+
def stat(self, storage_path: str) -> FileStat:
|
131
|
+
"""The stat method can be used for both files and directories, so long as they exist"""
|
132
|
+
pass
|
133
|
+
|
134
|
+
@_abc.abstractmethod
|
135
|
+
def ls(self, storage_path: str, recursive: bool = False) -> _tp.List[FileStat]:
|
136
|
+
"""The ls method only works on directories, it cannot be used for regular files"""
|
137
|
+
pass
|
138
|
+
|
139
|
+
@_abc.abstractmethod
|
140
|
+
def mkdir(self, storage_path: str, recursive: bool = False):
|
141
|
+
"""The mkdir method will succeed silently if the directory already exists"""
|
142
|
+
pass
|
143
|
+
|
144
|
+
@_abc.abstractmethod
|
145
|
+
def rm(self, storage_path: str):
|
146
|
+
"""The rm method only works on regular files, it cannot be used for directories and is not recursive"""
|
147
|
+
pass
|
148
|
+
|
149
|
+
@_abc.abstractmethod
|
150
|
+
def rmdir(self, storage_path: str):
|
151
|
+
"""The rmdir method only works on directories and is always recursive"""
|
152
|
+
pass
|
153
|
+
|
154
|
+
@_abc.abstractmethod
|
155
|
+
def read_byte_stream(self, storage_path: str) -> _tp.ContextManager[_tp.BinaryIO]:
|
156
|
+
"""The read_byte_stream method only works for existing files"""
|
157
|
+
pass
|
158
|
+
|
159
|
+
@_abc.abstractmethod
|
160
|
+
def write_byte_stream(self, storage_path: str) -> _tp.ContextManager[_tp.BinaryIO]:
|
161
|
+
"""The write_byte_stream method will always overwrite an existing file if it exists"""
|
162
|
+
pass
|
163
|
+
|
164
|
+
def read_bytes(self, storage_path: str) -> bytes:
|
165
|
+
"""The read_bytes method only works for existing files"""
|
166
|
+
with self.read_byte_stream(storage_path) as stream:
|
167
|
+
return stream.read()
|
168
|
+
|
169
|
+
def write_bytes(self, storage_path: str, data: bytes):
|
170
|
+
"""The write_bytes method will always overwrite an existing file if it exists"""
|
171
|
+
with self.write_byte_stream(storage_path) as stream:
|
172
|
+
stream.write(data)
|
173
|
+
|
174
|
+
|
175
|
+
|
176
|
+
class TracDataContext(TracContext):
|
177
|
+
|
178
|
+
@_abc.abstractmethod
|
179
|
+
def get_file_storage(self, storage_key: str) -> TracFileStorage:
|
180
|
+
pass
|
181
|
+
|
182
|
+
@_abc.abstractmethod
|
183
|
+
def get_data_storage(self, storage_key: str) -> None:
|
184
|
+
pass
|
185
|
+
|
186
|
+
@_abc.abstractmethod
|
187
|
+
def add_data_import(self, dataset_key: str):
|
188
|
+
pass
|
189
|
+
|
190
|
+
@_abc.abstractmethod
|
191
|
+
def set_source_metadata(self, dataset_key: str, storage_key: str, source_info: FileStat):
|
192
|
+
pass
|
193
|
+
|
194
|
+
@_abc.abstractmethod
|
195
|
+
def set_attribute(self, dataset_key: str, attribute_name: str, value: _tp.Any):
|
196
|
+
pass
|
197
|
+
|
198
|
+
@_abc.abstractmethod
|
199
|
+
def set_schema(self, dataset_key: str, schema: SchemaDefinition):
|
200
|
+
pass
|
201
|
+
|
202
|
+
|
203
|
+
class TracDataImport(TracModel):
|
204
|
+
|
205
|
+
def define_inputs(self) -> _tp.Dict[str, ModelInputSchema]:
|
206
|
+
return dict()
|
207
|
+
|
208
|
+
@_abc.abstractmethod
|
209
|
+
def run_model(self, ctx: TracDataContext):
|
210
|
+
pass
|
211
|
+
|
212
|
+
|
213
|
+
class TracDataExport(TracModel):
|
214
|
+
|
215
|
+
def define_outputs(self) -> _tp.Dict[str, ModelOutputSchema]:
|
216
|
+
return dict()
|
217
|
+
|
218
|
+
@_abc.abstractmethod
|
219
|
+
def run_model(self, ctx: TracDataContext):
|
220
|
+
pass
|
tracdap/rt/api/hook.py
CHANGED
@@ -118,11 +118,14 @@ class _StaticApiHook:
|
|
118
118
|
|
119
119
|
pass
|
120
120
|
|
121
|
+
@_abc.abstractmethod
|
122
|
+
def infer_schema(self, dataset: _tp.Any) -> _meta.SchemaDefinition:
|
123
|
+
pass
|
124
|
+
|
121
125
|
@_abc.abstractmethod
|
122
126
|
def define_input_table(
|
123
127
|
self, *fields: _tp.Union[_meta.FieldSchema, _tp.List[_meta.FieldSchema]],
|
124
|
-
label: _tp.Optional[str] = None,
|
125
|
-
optional: bool = False,
|
128
|
+
label: _tp.Optional[str] = None, optional: bool = False, dynamic: bool = False,
|
126
129
|
input_props: _tp.Optional[_tp.Dict[str, _tp.Any]] = None) \
|
127
130
|
-> _meta.ModelInputSchema:
|
128
131
|
|
@@ -131,8 +134,7 @@ class _StaticApiHook:
|
|
131
134
|
@_abc.abstractmethod
|
132
135
|
def define_output_table(
|
133
136
|
self, *fields: _tp.Union[_meta.FieldSchema, _tp.List[_meta.FieldSchema]],
|
134
|
-
label: _tp.Optional[str] = None,
|
135
|
-
optional: bool = False,
|
137
|
+
label: _tp.Optional[str] = None, optional: bool = False, dynamic: bool = False,
|
136
138
|
output_props: _tp.Optional[_tp.Dict[str, _tp.Any]] = None) \
|
137
139
|
-> _meta.ModelOutputSchema:
|
138
140
|
|