tracdap-runtime 0.6.4__py3-none-any.whl → 0.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. tracdap/rt/_exec/context.py +382 -29
  2. tracdap/rt/_exec/dev_mode.py +123 -94
  3. tracdap/rt/_exec/engine.py +120 -9
  4. tracdap/rt/_exec/functions.py +125 -20
  5. tracdap/rt/_exec/graph.py +38 -13
  6. tracdap/rt/_exec/graph_builder.py +120 -9
  7. tracdap/rt/_impl/data.py +115 -49
  8. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +74 -30
  9. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +120 -2
  10. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +12 -10
  11. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.pyi +14 -2
  12. tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +29 -0
  13. tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.pyi +16 -0
  14. tracdap/rt/_impl/models.py +8 -0
  15. tracdap/rt/_impl/static_api.py +16 -0
  16. tracdap/rt/_impl/storage.py +37 -25
  17. tracdap/rt/_impl/validation.py +76 -7
  18. tracdap/rt/_plugins/repo_git.py +1 -1
  19. tracdap/rt/_version.py +1 -1
  20. tracdap/rt/api/experimental.py +220 -0
  21. tracdap/rt/api/hook.py +4 -0
  22. tracdap/rt/api/model_api.py +48 -6
  23. tracdap/rt/config/__init__.py +2 -2
  24. tracdap/rt/config/common.py +6 -0
  25. tracdap/rt/metadata/__init__.py +25 -20
  26. tracdap/rt/metadata/job.py +54 -0
  27. tracdap/rt/metadata/model.py +18 -0
  28. tracdap/rt/metadata/resource.py +24 -0
  29. {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.5.dist-info}/METADATA +3 -1
  30. {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.5.dist-info}/RECORD +33 -29
  31. {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.5.dist-info}/LICENSE +0 -0
  32. {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.5.dist-info}/WHEEL +0 -0
  33. {tracdap_runtime-0.6.4.dist-info → tracdap_runtime-0.6.5.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,22 @@
1
1
  from tracdap.rt._impl.grpc.tracdap.metadata import type_pb2 as _type_pb2
2
2
  from tracdap.rt._impl.grpc.tracdap.metadata import data_pb2 as _data_pb2
3
3
  from google.protobuf.internal import containers as _containers
4
+ from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
4
5
  from google.protobuf import descriptor as _descriptor
5
6
  from google.protobuf import message as _message
6
7
  from typing import ClassVar as _ClassVar, Mapping as _Mapping, Optional as _Optional, Union as _Union
7
8
 
8
9
  DESCRIPTOR: _descriptor.FileDescriptor
9
10
 
11
+ class ModelType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
12
+ __slots__ = ()
13
+ STANDARD_MODEL: _ClassVar[ModelType]
14
+ DATA_IMPORT_MODEL: _ClassVar[ModelType]
15
+ DATA_EXPORT_MODEL: _ClassVar[ModelType]
16
+ STANDARD_MODEL: ModelType
17
+ DATA_IMPORT_MODEL: ModelType
18
+ DATA_EXPORT_MODEL: ModelType
19
+
10
20
  class ModelParameter(_message.Message):
11
21
  __slots__ = ("paramType", "label", "defaultValue", "paramProps")
12
22
  class ParamPropsEntry(_message.Message):
@@ -69,7 +79,7 @@ class ModelOutputSchema(_message.Message):
69
79
  def __init__(self, schema: _Optional[_Union[_data_pb2.SchemaDefinition, _Mapping]] = ..., label: _Optional[str] = ..., optional: bool = ..., dynamic: bool = ..., outputProps: _Optional[_Mapping[str, _type_pb2.Value]] = ...) -> None: ...
70
80
 
71
81
  class ModelDefinition(_message.Message):
72
- __slots__ = ("language", "repository", "packageGroup", "package", "version", "entryPoint", "path", "parameters", "inputs", "outputs", "staticAttributes")
82
+ __slots__ = ("language", "repository", "packageGroup", "package", "version", "entryPoint", "path", "parameters", "inputs", "outputs", "staticAttributes", "modelType")
73
83
  class ParametersEntry(_message.Message):
74
84
  __slots__ = ("key", "value")
75
85
  KEY_FIELD_NUMBER: _ClassVar[int]
@@ -109,6 +119,7 @@ class ModelDefinition(_message.Message):
109
119
  INPUTS_FIELD_NUMBER: _ClassVar[int]
110
120
  OUTPUTS_FIELD_NUMBER: _ClassVar[int]
111
121
  STATICATTRIBUTES_FIELD_NUMBER: _ClassVar[int]
122
+ MODELTYPE_FIELD_NUMBER: _ClassVar[int]
112
123
  language: str
113
124
  repository: str
114
125
  packageGroup: str
@@ -120,4 +131,5 @@ class ModelDefinition(_message.Message):
120
131
  inputs: _containers.MessageMap[str, ModelInputSchema]
121
132
  outputs: _containers.MessageMap[str, ModelOutputSchema]
122
133
  staticAttributes: _containers.MessageMap[str, _type_pb2.Value]
123
- def __init__(self, language: _Optional[str] = ..., repository: _Optional[str] = ..., packageGroup: _Optional[str] = ..., package: _Optional[str] = ..., version: _Optional[str] = ..., entryPoint: _Optional[str] = ..., path: _Optional[str] = ..., parameters: _Optional[_Mapping[str, ModelParameter]] = ..., inputs: _Optional[_Mapping[str, ModelInputSchema]] = ..., outputs: _Optional[_Mapping[str, ModelOutputSchema]] = ..., staticAttributes: _Optional[_Mapping[str, _type_pb2.Value]] = ...) -> None: ...
134
+ modelType: ModelType
135
+ def __init__(self, language: _Optional[str] = ..., repository: _Optional[str] = ..., packageGroup: _Optional[str] = ..., package: _Optional[str] = ..., version: _Optional[str] = ..., entryPoint: _Optional[str] = ..., path: _Optional[str] = ..., parameters: _Optional[_Mapping[str, ModelParameter]] = ..., inputs: _Optional[_Mapping[str, ModelInputSchema]] = ..., outputs: _Optional[_Mapping[str, ModelOutputSchema]] = ..., staticAttributes: _Optional[_Mapping[str, _type_pb2.Value]] = ..., modelType: _Optional[_Union[ModelType, str]] = ...) -> None: ...
@@ -0,0 +1,29 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
3
+ # source: tracdap/rt/_impl/grpc/tracdap/metadata/resource.proto
4
+ # Protobuf Python Version: 4.25.3
5
+ """Generated protocol buffer code."""
6
+ from google.protobuf import descriptor as _descriptor
7
+ from google.protobuf import descriptor_pool as _descriptor_pool
8
+ from google.protobuf import symbol_database as _symbol_database
9
+ from google.protobuf.internal import builder as _builder
10
+ # @@protoc_insertion_point(imports)
11
+
12
+ _sym_db = _symbol_database.Default()
13
+
14
+
15
+ from tracdap.rt._impl.grpc.tracdap.metadata import object_id_pb2 as tracdap_dot_rt_dot___impl_dot_grpc_dot_tracdap_dot_metadata_dot_object__id__pb2
16
+ from tracdap.rt._impl.grpc.tracdap.metadata import object_pb2 as tracdap_dot_rt_dot___impl_dot_grpc_dot_tracdap_dot_metadata_dot_object__pb2
17
+
18
+
19
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n5tracdap/rt/_impl/grpc/tracdap/metadata/resource.proto\x12\x10tracdap.metadata\x1a\x36tracdap/rt/_impl/grpc/tracdap/metadata/object_id.proto\x1a\x33tracdap/rt/_impl/grpc/tracdap/metadata/object.proto*m\n\x0cResourceType\x12\x19\n\x15RESOURCE_TYPE_NOT_SET\x10\x00\x12\x14\n\x10MODEL_REPOSITORY\x10\x01\x12\x14\n\x10INTERNAL_STORAGE\x10\x02\"\x04\x08\x03\x10\x03*\x10\x45XTERNAL_STORAGEB\x1e\n\x1aorg.finos.tracdap.metadataP\x01\x62\x06proto3')
20
+
21
+ _globals = globals()
22
+ _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
23
+ _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'tracdap.rt._impl.grpc.tracdap.metadata.resource_pb2', _globals)
24
+ if _descriptor._USE_C_DESCRIPTORS == False:
25
+ _globals['DESCRIPTOR']._options = None
26
+ _globals['DESCRIPTOR']._serialized_options = b'\n\032org.finos.tracdap.metadataP\001'
27
+ _globals['_RESOURCETYPE']._serialized_start=184
28
+ _globals['_RESOURCETYPE']._serialized_end=293
29
+ # @@protoc_insertion_point(module_scope)
@@ -0,0 +1,16 @@
1
+ from tracdap.rt._impl.grpc.tracdap.metadata import object_id_pb2 as _object_id_pb2
2
+ from tracdap.rt._impl.grpc.tracdap.metadata import object_pb2 as _object_pb2
3
+ from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
4
+ from google.protobuf import descriptor as _descriptor
5
+ from typing import ClassVar as _ClassVar
6
+
7
+ DESCRIPTOR: _descriptor.FileDescriptor
8
+
9
+ class ResourceType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
10
+ __slots__ = ()
11
+ RESOURCE_TYPE_NOT_SET: _ClassVar[ResourceType]
12
+ MODEL_REPOSITORY: _ClassVar[ResourceType]
13
+ INTERNAL_STORAGE: _ClassVar[ResourceType]
14
+ RESOURCE_TYPE_NOT_SET: ResourceType
15
+ MODEL_REPOSITORY: ResourceType
16
+ INTERNAL_STORAGE: ResourceType
@@ -19,6 +19,7 @@ import pathlib
19
19
  import copy
20
20
 
21
21
  import tracdap.rt.api as _api
22
+ import tracdap.rt.api.experimental as _eapi
22
23
  import tracdap.rt.metadata as _meta
23
24
  import tracdap.rt.config as _cfg
24
25
  import tracdap.rt.exceptions as _ex
@@ -208,6 +209,13 @@ class ModelLoader:
208
209
  model_def.inputs = inputs
209
210
  model_def.outputs = outputs
210
211
 
212
+ if isinstance(model, _eapi.TracDataImport):
213
+ model_def.modelType = _meta.ModelType.DATA_IMPORT_MODEL
214
+ elif isinstance(model, _eapi.TracDataExport):
215
+ model_def.modelType = _meta.ModelType.DATA_EXPORT_MODEL
216
+ else:
217
+ model_def.modelType = _meta.ModelType.STANDARD_MODEL
218
+
211
219
  _val.quick_validate_model_def(model_def)
212
220
 
213
221
  for attr_name, attr_value in attributes.items():
@@ -17,6 +17,7 @@ import types as _ts
17
17
 
18
18
  import tracdap.rt.metadata as _meta
19
19
  import tracdap.rt.exceptions as _ex
20
+ import tracdap.rt._impl.data as _data
20
21
  import tracdap.rt._impl.schemas as _schemas
21
22
  import tracdap.rt._impl.type_system as _type_system
22
23
  import tracdap.rt._impl.validation as _val
@@ -152,6 +153,21 @@ class StaticApiImpl(_StaticApiHook):
152
153
 
153
154
  return _schemas.SchemaLoader.load_schema(package, schema_file)
154
155
 
156
+ def infer_schema(self, dataset: _tp.Any) -> _meta.SchemaDefinition:
157
+
158
+ if _data.pandas and isinstance(dataset, _data.pandas.DataFrame):
159
+ arrow_schema = _data.DataMapping.pandas_to_arrow_schema(dataset)
160
+
161
+ elif _data.polars and isinstance(dataset, _data.polars.DataFrame):
162
+ arrow_schema = _data.DataMapping.polars_to_arrow_schema(dataset)
163
+
164
+ else:
165
+ dataset_type = f"{type(dataset).__module__}.{type(dataset).__name__}"
166
+ message = f"Schema inference is not available for dataset type [{dataset_type}]"
167
+ raise _ex.ERuntimeValidation(message)
168
+
169
+ return _data.DataMapping.arrow_to_trac_schema(arrow_schema)
170
+
155
171
  def define_input_table(
156
172
  self, *fields: _tp.Union[_meta.FieldSchema, _tp.List[_meta.FieldSchema]],
157
173
  label: _tp.Optional[str] = None, optional: bool = False, dynamic: bool = False,
@@ -30,6 +30,7 @@ import tracdap.rt.exceptions as _ex
30
30
  import tracdap.rt.ext.plugins as plugins
31
31
  import tracdap.rt._impl.data as _data
32
32
  import tracdap.rt._impl.util as _util
33
+ import tracdap.rt._impl.validation as _val
33
34
 
34
35
  # Import storage interfaces
35
36
  from tracdap.rt.ext.storage import *
@@ -41,7 +42,11 @@ class FormatManager:
41
42
  def get_data_format(cls, format_code: str, format_options: tp.Dict[str, tp.Any]) -> IDataFormat:
42
43
 
43
44
  try:
44
- config = _cfg.PluginConfig(format_code, format_options)
45
+
46
+ config = _cfg.PluginConfig(
47
+ protocol=format_code,
48
+ properties=format_options)
49
+
45
50
  return plugins.PluginManager.load_plugin(IDataFormat, config)
46
51
 
47
52
  except _ex.EPluginNotAvailable as e:
@@ -73,11 +78,18 @@ class StorageManager:
73
78
  self.__log = _util.logger_for_object(self)
74
79
  self.__file_storage: tp.Dict[str, IFileStorage] = dict()
75
80
  self.__data_storage: tp.Dict[str, IDataStorage] = dict()
81
+ self.__external: tp.List[str] = list()
76
82
  self.__settings = sys_config.storage
77
83
 
78
84
  for storage_key, storage_config in sys_config.storage.buckets.items():
79
85
  self.create_storage(storage_key, storage_config)
80
86
 
87
+ for storage_key, storage_config in sys_config.storage.external.items():
88
+ if storage_key in self.__file_storage or storage_key in self.__data_storage:
89
+ raise _ex.EConfig(f"Storage key [{storage_key}] is defined as both internal and external storage")
90
+ self.__external.append(storage_key)
91
+ self.create_storage(storage_key, storage_config)
92
+
81
93
  def default_storage_key(self):
82
94
  return self.__settings.defaultBucket
83
95
 
@@ -147,26 +159,32 @@ class StorageManager:
147
159
  self.__file_storage[storage_key] = file_storage
148
160
  self.__data_storage[storage_key] = data_storage
149
161
 
150
- def has_file_storage(self, storage_key: str) -> bool:
162
+ def has_file_storage(self, storage_key: str, external: bool = False) -> bool:
163
+
164
+ if external ^ (storage_key in self.__external):
165
+ return False
151
166
 
152
167
  return storage_key in self.__file_storage
153
168
 
154
- def get_file_storage(self, storage_key: str) -> IFileStorage:
169
+ def get_file_storage(self, storage_key: str, external: bool = False) -> IFileStorage:
155
170
 
156
- if not self.has_file_storage(storage_key):
171
+ if not self.has_file_storage(storage_key, external):
157
172
  err = f"File storage is not configured for storage key [{storage_key}]"
158
173
  self.__log.error(err)
159
174
  raise _ex.EStorageConfig(err)
160
175
 
161
176
  return self.__file_storage[storage_key]
162
177
 
163
- def has_data_storage(self, storage_key: str) -> bool:
178
+ def has_data_storage(self, storage_key: str, external: bool = False) -> bool:
179
+
180
+ if external ^ (storage_key in self.__external):
181
+ return False
164
182
 
165
183
  return storage_key in self.__data_storage
166
184
 
167
- def get_data_storage(self, storage_key: str) -> IDataStorage:
185
+ def get_data_storage(self, storage_key: str, external: bool = False) -> IDataStorage:
168
186
 
169
- if not self.has_data_storage(storage_key):
187
+ if not self.has_data_storage(storage_key, external):
170
188
  err = f"Data storage is not configured for storage key [{storage_key}]"
171
189
  self.__log.error(err)
172
190
  raise _ex.EStorageConfig(err)
@@ -587,29 +605,27 @@ class CommonFileStorage(IFileStorage):
587
605
 
588
606
  try:
589
607
 
590
- if storage_path is None or len(storage_path.strip()) == 0:
608
+ if _val.StorageValidator.storage_path_is_empty(storage_path):
591
609
  raise self._explicit_error(self.ExplicitError.STORAGE_PATH_NULL_OR_BLANK, operation_name, storage_path)
592
610
 
593
- if self._ILLEGAL_PATH_CHARS.match(storage_path):
611
+ if _val.StorageValidator.storage_path_invalid(storage_path):
594
612
  raise self._explicit_error(self.ExplicitError.STORAGE_PATH_INVALID, operation_name, storage_path)
595
-
596
- relative_path = pathlib.Path(storage_path)
597
-
598
- if relative_path.is_absolute():
613
+
614
+ if _val.StorageValidator.storage_path_not_relative(storage_path):
599
615
  raise self._explicit_error(self.ExplicitError.STORAGE_PATH_NOT_RELATIVE, operation_name, storage_path)
600
616
 
617
+ if _val.StorageValidator.storage_path_outside_root(storage_path):
618
+ raise self._explicit_error(self.ExplicitError.STORAGE_PATH_OUTSIDE_ROOT, operation_name, storage_path)
619
+
620
+ if not allow_root_dir and _val.StorageValidator.storage_path_is_root(storage_path):
621
+ raise self._explicit_error(self.ExplicitError.STORAGE_PATH_IS_ROOT, operation_name, storage_path)
622
+
601
623
  root_path = pathlib.Path("C:\\root") if _util.is_windows() else pathlib.Path("/root")
624
+ relative_path = pathlib.Path(storage_path)
602
625
  absolute_path = root_path.joinpath(relative_path).resolve(False)
603
626
 
604
627
  if absolute_path == root_path:
605
- if not allow_root_dir:
606
- raise self._explicit_error(self.ExplicitError.STORAGE_PATH_IS_ROOT, operation_name, storage_path)
607
- else:
608
- return ""
609
-
610
- # is_relative_to only supported in Python 3.9+, we need to support 3.7
611
- if root_path not in absolute_path.parents:
612
- raise self._explicit_error(self.ExplicitError.STORAGE_PATH_OUTSIDE_ROOT, operation_name, storage_path)
628
+ return ""
613
629
  else:
614
630
  return absolute_path.relative_to(root_path).as_posix()
615
631
 
@@ -639,10 +655,6 @@ class CommonFileStorage(IFileStorage):
639
655
 
640
656
  return err
641
657
 
642
- _ILLEGAL_PATH_CHARS_WINDOWS = re.compile(r".*[\x00<>:\"\'|?*].*")
643
- _ILLEGAL_PATH_CHARS_POSIX = re.compile(r".*[\x00<>:\"\'|?*\\].*")
644
- _ILLEGAL_PATH_CHARS = _ILLEGAL_PATH_CHARS_WINDOWS if _util.is_windows() else _ILLEGAL_PATH_CHARS_POSIX
645
-
646
658
  class ExplicitError(enum.Enum):
647
659
 
648
660
  # Validation failures
@@ -15,7 +15,9 @@
15
15
  import inspect
16
16
  import logging
17
17
  import re
18
+ import types
18
19
  import typing as tp
20
+ import pathlib
19
21
 
20
22
  import tracdap.rt.metadata as meta
21
23
  import tracdap.rt.exceptions as ex
@@ -25,6 +27,11 @@ import tracdap.rt._impl.util as util
25
27
  from tracdap.rt.api.hook import _Named # noqa
26
28
 
27
29
 
30
+ def require_package(module_name: str, module_obj: types.ModuleType):
31
+ if module_obj is None:
32
+ raise ex.ERuntimeValidation(f"Optional package [{module_name}] is not installed")
33
+
34
+
28
35
  def validate_signature(method: tp.Callable, *args, **kwargs):
29
36
  _TypeValidator.validate_signature(method, *args, **kwargs)
30
37
 
@@ -62,15 +69,19 @@ class _TypeValidator:
62
69
  signature = inspect.signature(method)
63
70
  cls.__method_cache[method.__name__] = signature
64
71
 
72
+ hints = tp.get_type_hints(method)
73
+
65
74
  positional_index = 0
66
75
 
67
76
  for param_name, param in signature.parameters.items():
68
77
 
78
+ param_type = hints.get(param_name)
79
+
69
80
  values = cls._select_arg(method.__name__, param, positional_index, *args, **kwargs)
70
81
  positional_index += len(values)
71
82
 
72
83
  for value in values:
73
- cls._validate_arg(method.__name__, param, value)
84
+ cls._validate_arg(method.__name__, param_name, param_type, value)
74
85
 
75
86
  @classmethod
76
87
  def validate_return_type(cls, method: tp.Callable, value: tp.Any):
@@ -146,14 +157,18 @@ class _TypeValidator:
146
157
  raise ex.EUnexpected("Invalid method signature in runtime API (this is a bug)")
147
158
 
148
159
  @classmethod
149
- def _validate_arg(cls, method_name: str, parameter: inspect.Parameter, value: tp.Any):
160
+ def _validate_arg(cls, method_name: str, param_name: str, param_type: tp.Type, value: tp.Any):
150
161
 
151
- if not cls._validate_type(parameter.annotation, value):
162
+ if not cls._validate_type(param_type, value):
152
163
 
153
- expected_type = cls._type_name(parameter.annotation)
164
+ expected_type = cls._type_name(param_type)
154
165
  actual_type = cls._type_name(type(value)) if value is not None else str(None)
155
166
 
156
- err = f"Invalid API call [{method_name}()]: Wrong type for [{parameter.name}]" \
167
+ if expected_type == actual_type:
168
+ expected_type = cls._type_name(param_type, qualified=True)
169
+ actual_type = cls._type_name(type(value), qualified=True)
170
+
171
+ err = f"Invalid API call [{method_name}()]: Wrong type for [{param_name}]" \
157
172
  + f" (expected [{expected_type}], got [{actual_type}])"
158
173
 
159
174
  cls._log.error(err)
@@ -210,7 +225,7 @@ class _TypeValidator:
210
225
  return isinstance(value, expected_type)
211
226
 
212
227
  @classmethod
213
- def _type_name(cls, type_var: tp.Type) -> str:
228
+ def _type_name(cls, type_var: tp.Type, qualified: bool = False) -> str:
214
229
 
215
230
  if isinstance(type_var, cls.__generic_metaclass):
216
231
 
@@ -230,7 +245,10 @@ class _TypeValidator:
230
245
 
231
246
  raise ex.ETracInternal(f"Validation of [{origin.__name__}] generic parameters is not supported yet")
232
247
 
233
- return type_var.__name__
248
+ if qualified:
249
+ return f"{type_var.__module__}.{type_var.__name__}"
250
+ else:
251
+ return type_var.__name__
234
252
 
235
253
 
236
254
  class StaticValidator:
@@ -458,3 +476,54 @@ class StaticValidator:
458
476
  def _fail(cls, message: str):
459
477
  cls._log.error(message)
460
478
  raise ex.EModelValidation(message)
479
+
480
+
481
+ class StorageValidator:
482
+
483
+ __ILLEGAL_PATH_CHARS_WINDOWS = re.compile(r".*[\x00<>:\"\'|?*].*")
484
+ __ILLEGAL_PATH_CHARS_POSIX = re.compile(r".*[\x00<>:\"\'|?*\\].*")
485
+ __ILLEGAL_PATH_CHARS = __ILLEGAL_PATH_CHARS_WINDOWS if util.is_windows() else __ILLEGAL_PATH_CHARS_POSIX
486
+
487
+ @classmethod
488
+ def storage_path_is_empty(cls, storage_path: str):
489
+
490
+ return storage_path is None or len(storage_path.strip()) == 0
491
+
492
+ @classmethod
493
+ def storage_path_invalid(cls, storage_path: str):
494
+
495
+ if cls.__ILLEGAL_PATH_CHARS.match(storage_path):
496
+ return True
497
+
498
+ try:
499
+ # Make sure the path can be interpreted as a path
500
+ pathlib.Path(storage_path)
501
+ return False
502
+ except ValueError:
503
+ return True
504
+
505
+ @classmethod
506
+ def storage_path_not_relative(cls, storage_path: str):
507
+
508
+ relative_path = pathlib.Path(storage_path)
509
+ return relative_path.is_absolute()
510
+
511
+ @classmethod
512
+ def storage_path_outside_root(cls, storage_path: str):
513
+
514
+ # is_relative_to only supported in Python 3.9+, we need to support 3.8
515
+
516
+ root_path = pathlib.Path("C:\\root") if util.is_windows() else pathlib.Path("/root")
517
+ relative_path = pathlib.Path(storage_path)
518
+ absolute_path = root_path.joinpath(relative_path).resolve(False)
519
+
520
+ return root_path != absolute_path and root_path not in absolute_path.parents
521
+
522
+ @classmethod
523
+ def storage_path_is_root(cls, storage_path: str):
524
+
525
+ root_path = pathlib.Path("C:\\root") if util.is_windows() else pathlib.Path("/root")
526
+ relative_path = pathlib.Path(storage_path)
527
+ absolute_path = root_path.joinpath(relative_path).resolve(False)
528
+
529
+ return root_path == absolute_path
@@ -38,7 +38,7 @@ class GitRepository(IModelRepository):
38
38
 
39
39
  REPO_URL_KEY = "repoUrl"
40
40
  NATIVE_GIT_KEY = "nativeGit"
41
- NATIVE_GIT_DEFAULT = True
41
+ NATIVE_GIT_DEFAULT = False
42
42
 
43
43
  GIT_TIMEOUT_SECONDS = 30
44
44
 
tracdap/rt/_version.py CHANGED
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.6.4"
15
+ __version__ = "0.6.5"
@@ -0,0 +1,220 @@
1
+ # Copyright 2024 Accenture Global Solutions Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import abc as _abc
16
+ import dataclasses as _dc
17
+ import datetime as _dt
18
+ import enum as _enum
19
+ import typing as _tp
20
+
21
+ from tracdap.rt.api import *
22
+ from .hook import _StaticApiHook
23
+
24
+
25
+ _DATA_FRAMEWORK = _tp.TypeVar('_DATA_FRAMEWORK')
26
+
27
+
28
+ class _DataFramework(_tp.Generic[_DATA_FRAMEWORK]):
29
+
30
+ PANDAS: "_DataFramework"
31
+ POLARS: "_DataFramework"
32
+
33
+ def __init__(self, framework_name, framework_type: _DATA_FRAMEWORK):
34
+ self.__framework_name = framework_name
35
+ self.__framework_type = framework_type
36
+
37
+ def __str__(self):
38
+ return self.__framework_name
39
+
40
+
41
+ if _tp.TYPE_CHECKING:
42
+
43
+ if pandas:
44
+ _DataFramework.PANDAS = _DataFramework('pandas', pandas.DataFrame)
45
+ """The original Python dataframe library, most widely used"""
46
+ else:
47
+ _DataFramework.PANDAS = _DataFramework('pandas', None)
48
+ """Pandas data framework is not installed"""
49
+
50
+ if polars:
51
+ _DataFramework.POLARS = _DataFramework('polars', polars.DataFrame)
52
+ """A modern, fast and simple alternative to Pandas"""
53
+ else:
54
+ _DataFramework.POLARS = _DataFramework('polars', None)
55
+ """Polars data framework is not installed"""
56
+
57
+ else:
58
+
59
+ _DataFramework.PANDAS = _DataFramework('pandas', None)
60
+ _DataFramework.POLARS = _DataFramework('polars', None)
61
+
62
+ PANDAS = _DataFramework.PANDAS
63
+ POLARS = _DataFramework.POLARS
64
+
65
+
66
+ class TracContext(TracContext):
67
+
68
+ @_abc.abstractmethod
69
+ def get_table(self, dataset_name: str, framework: _DataFramework[_DATA_FRAMEWORK]) -> _DATA_FRAMEWORK:
70
+
71
+ pass
72
+
73
+ @_abc.abstractmethod
74
+ def put_table(self, dataset_name: str, dataset: _DATA_FRAMEWORK):
75
+
76
+ pass
77
+
78
+
79
+ def init_static():
80
+ import tracdap.rt._impl.static_api as _static_impl # noqa
81
+ _static_impl.StaticApiImpl.register_impl()
82
+
83
+
84
+ def infer_schema(dataset: _tp.Any) -> SchemaDefinition:
85
+ sa = _StaticApiHook.get_instance()
86
+ return sa.infer_schema(dataset)
87
+
88
+
89
+ class FileType(_enum.Enum):
90
+
91
+ FILE = 1
92
+ DIRECTORY = 2
93
+
94
+
95
+ @_dc.dataclass
96
+ class FileStat:
97
+
98
+ """
99
+ Dataclass to represent some basic file stat info independent of the storage technology used
100
+ I.e. do not depend on Python stat_result class that refers to locally-mounted filesystems
101
+ Timestamps are held in UTC
102
+ """
103
+
104
+ file_name: str
105
+ file_type: FileType
106
+ storage_path: str
107
+ size: int
108
+
109
+ mtime: _tp.Optional[_dt.datetime] = None
110
+ atime: _tp.Optional[_dt.datetime] = None
111
+
112
+
113
+ class TracFileStorage:
114
+
115
+ @_abc.abstractmethod
116
+ def get_storage_key(self) -> str:
117
+ pass
118
+
119
+ @_abc.abstractmethod
120
+ def exists(self, storage_path: str) -> bool:
121
+ """The exists method can be used for both files and directories"""
122
+ pass
123
+
124
+ @_abc.abstractmethod
125
+ def size(self, storage_path: str) -> int:
126
+ """The rm method only works on regular files, it cannot be used for directories"""
127
+ pass
128
+
129
+ @_abc.abstractmethod
130
+ def stat(self, storage_path: str) -> FileStat:
131
+ """The stat method can be used for both files and directories, so long as they exist"""
132
+ pass
133
+
134
+ @_abc.abstractmethod
135
+ def ls(self, storage_path: str, recursive: bool = False) -> _tp.List[FileStat]:
136
+ """The ls method only works on directories, it cannot be used for regular files"""
137
+ pass
138
+
139
+ @_abc.abstractmethod
140
+ def mkdir(self, storage_path: str, recursive: bool = False):
141
+ """The mkdir method will succeed silently if the directory already exists"""
142
+ pass
143
+
144
+ @_abc.abstractmethod
145
+ def rm(self, storage_path: str):
146
+ """The rm method only works on regular files, it cannot be used for directories and is not recursive"""
147
+ pass
148
+
149
+ @_abc.abstractmethod
150
+ def rmdir(self, storage_path: str):
151
+ """The rmdir method only works on directories and is always recursive"""
152
+ pass
153
+
154
+ @_abc.abstractmethod
155
+ def read_byte_stream(self, storage_path: str) -> _tp.ContextManager[_tp.BinaryIO]:
156
+ """The read_byte_stream method only works for existing files"""
157
+ pass
158
+
159
+ @_abc.abstractmethod
160
+ def write_byte_stream(self, storage_path: str) -> _tp.ContextManager[_tp.BinaryIO]:
161
+ """The write_byte_stream method will always overwrite an existing file if it exists"""
162
+ pass
163
+
164
+ def read_bytes(self, storage_path: str) -> bytes:
165
+ """The read_bytes method only works for existing files"""
166
+ with self.read_byte_stream(storage_path) as stream:
167
+ return stream.read()
168
+
169
+ def write_bytes(self, storage_path: str, data: bytes):
170
+ """The write_bytes method will always overwrite an existing file if it exists"""
171
+ with self.write_byte_stream(storage_path) as stream:
172
+ stream.write(data)
173
+
174
+
175
+
176
+ class TracDataContext(TracContext):
177
+
178
+ @_abc.abstractmethod
179
+ def get_file_storage(self, storage_key: str) -> TracFileStorage:
180
+ pass
181
+
182
+ @_abc.abstractmethod
183
+ def get_data_storage(self, storage_key: str) -> None:
184
+ pass
185
+
186
+ @_abc.abstractmethod
187
+ def add_data_import(self, dataset_key: str):
188
+ pass
189
+
190
+ @_abc.abstractmethod
191
+ def set_source_metadata(self, dataset_key: str, storage_key: str, source_info: FileStat):
192
+ pass
193
+
194
+ @_abc.abstractmethod
195
+ def set_attribute(self, dataset_key: str, attribute_name: str, value: _tp.Any):
196
+ pass
197
+
198
+ @_abc.abstractmethod
199
+ def set_schema(self, dataset_key: str, schema: SchemaDefinition):
200
+ pass
201
+
202
+
203
+ class TracDataImport(TracModel):
204
+
205
+ def define_inputs(self) -> _tp.Dict[str, ModelInputSchema]:
206
+ return dict()
207
+
208
+ @_abc.abstractmethod
209
+ def run_model(self, ctx: TracDataContext):
210
+ pass
211
+
212
+
213
+ class TracDataExport(TracModel):
214
+
215
+ def define_outputs(self) -> _tp.Dict[str, ModelOutputSchema]:
216
+ return dict()
217
+
218
+ @_abc.abstractmethod
219
+ def run_model(self, ctx: TracDataContext):
220
+ pass
tracdap/rt/api/hook.py CHANGED
@@ -118,6 +118,10 @@ class _StaticApiHook:
118
118
 
119
119
  pass
120
120
 
121
+ @_abc.abstractmethod
122
+ def infer_schema(self, dataset: _tp.Any) -> _meta.SchemaDefinition:
123
+ pass
124
+
121
125
  @_abc.abstractmethod
122
126
  def define_input_table(
123
127
  self, *fields: _tp.Union[_meta.FieldSchema, _tp.List[_meta.FieldSchema]],