tracdap-runtime 0.7.1__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. tracdap/rt/_impl/core/__init__.py +14 -0
  2. tracdap/rt/_impl/{config_parser.py → core/config_parser.py} +36 -19
  3. tracdap/rt/_impl/{data.py → core/data.py} +136 -32
  4. tracdap/rt/_impl/core/logging.py +195 -0
  5. tracdap/rt/_impl/{models.py → core/models.py} +15 -12
  6. tracdap/rt/_impl/{repos.py → core/repos.py} +12 -3
  7. tracdap/rt/_impl/{schemas.py → core/schemas.py} +5 -5
  8. tracdap/rt/_impl/{shim.py → core/shim.py} +5 -4
  9. tracdap/rt/_impl/{storage.py → core/storage.py} +21 -10
  10. tracdap/rt/_impl/core/struct.py +547 -0
  11. tracdap/rt/_impl/{util.py → core/util.py} +1 -111
  12. tracdap/rt/_impl/{validation.py → core/validation.py} +99 -31
  13. tracdap/rt/_impl/exec/__init__.py +14 -0
  14. tracdap/rt/{_exec → _impl/exec}/actors.py +12 -14
  15. tracdap/rt/{_exec → _impl/exec}/context.py +228 -82
  16. tracdap/rt/{_exec → _impl/exec}/dev_mode.py +163 -81
  17. tracdap/rt/{_exec → _impl/exec}/engine.py +230 -105
  18. tracdap/rt/{_exec → _impl/exec}/functions.py +191 -100
  19. tracdap/rt/{_exec → _impl/exec}/graph.py +24 -36
  20. tracdap/rt/{_exec → _impl/exec}/graph_builder.py +252 -115
  21. tracdap/rt/_impl/grpc/codec.py +1 -1
  22. tracdap/rt/{_exec → _impl/grpc}/server.py +7 -6
  23. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2.py +3 -3
  24. tracdap/rt/_impl/grpc/tracdap/api/internal/runtime_pb2_grpc.py +1 -1
  25. tracdap/rt/_impl/grpc/tracdap/metadata/common_pb2.py +1 -1
  26. tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.py +40 -0
  27. tracdap/rt/_impl/grpc/tracdap/metadata/config_pb2.pyi +62 -0
  28. tracdap/rt/_impl/grpc/tracdap/metadata/custom_pb2.py +1 -1
  29. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.py +32 -20
  30. tracdap/rt/_impl/grpc/tracdap/metadata/data_pb2.pyi +48 -2
  31. tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.py +4 -2
  32. tracdap/rt/_impl/grpc/tracdap/metadata/file_pb2.pyi +8 -0
  33. tracdap/rt/_impl/grpc/tracdap/metadata/flow_pb2.py +1 -1
  34. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.py +65 -63
  35. tracdap/rt/_impl/grpc/tracdap/metadata/job_pb2.pyi +16 -2
  36. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.py +28 -26
  37. tracdap/rt/_impl/grpc/tracdap/metadata/model_pb2.pyi +14 -4
  38. tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.py +4 -4
  39. tracdap/rt/_impl/grpc/tracdap/metadata/object_id_pb2.pyi +6 -0
  40. tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.py +9 -7
  41. tracdap/rt/_impl/grpc/tracdap/metadata/object_pb2.pyi +12 -4
  42. tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.py +18 -5
  43. tracdap/rt/_impl/grpc/tracdap/metadata/resource_pb2.pyi +42 -2
  44. tracdap/rt/_impl/grpc/tracdap/metadata/search_pb2.py +1 -1
  45. tracdap/rt/_impl/grpc/tracdap/metadata/{stoarge_pb2.py → storage_pb2.py} +4 -4
  46. tracdap/rt/_impl/grpc/tracdap/metadata/tag_pb2.py +1 -1
  47. tracdap/rt/_impl/grpc/tracdap/metadata/tag_update_pb2.py +1 -1
  48. tracdap/rt/_impl/grpc/tracdap/metadata/type_pb2.py +1 -1
  49. tracdap/rt/{_exec → _impl}/runtime.py +32 -18
  50. tracdap/rt/_impl/static_api.py +65 -37
  51. tracdap/rt/_plugins/format_csv.py +1 -1
  52. tracdap/rt/_plugins/repo_git.py +56 -11
  53. tracdap/rt/_plugins/storage_sql.py +1 -1
  54. tracdap/rt/_version.py +1 -1
  55. tracdap/rt/api/__init__.py +5 -24
  56. tracdap/rt/api/constants.py +57 -0
  57. tracdap/rt/api/experimental.py +32 -0
  58. tracdap/rt/api/hook.py +26 -7
  59. tracdap/rt/api/model_api.py +16 -0
  60. tracdap/rt/api/static_api.py +265 -127
  61. tracdap/rt/config/__init__.py +11 -11
  62. tracdap/rt/config/common.py +2 -26
  63. tracdap/rt/config/dynamic.py +28 -0
  64. tracdap/rt/config/platform.py +17 -31
  65. tracdap/rt/config/runtime.py +2 -0
  66. tracdap/rt/ext/embed.py +2 -2
  67. tracdap/rt/ext/plugins.py +3 -3
  68. tracdap/rt/launch/launch.py +12 -14
  69. tracdap/rt/metadata/__init__.py +28 -18
  70. tracdap/rt/metadata/config.py +95 -0
  71. tracdap/rt/metadata/data.py +40 -0
  72. tracdap/rt/metadata/file.py +10 -0
  73. tracdap/rt/metadata/job.py +16 -0
  74. tracdap/rt/metadata/model.py +12 -2
  75. tracdap/rt/metadata/object.py +9 -1
  76. tracdap/rt/metadata/object_id.py +6 -0
  77. tracdap/rt/metadata/resource.py +41 -1
  78. {tracdap_runtime-0.7.1.dist-info → tracdap_runtime-0.8.0.dist-info}/METADATA +23 -17
  79. tracdap_runtime-0.8.0.dist-info/RECORD +129 -0
  80. {tracdap_runtime-0.7.1.dist-info → tracdap_runtime-0.8.0.dist-info}/WHEEL +1 -1
  81. tracdap/rt/_exec/__init__.py +0 -0
  82. tracdap_runtime-0.7.1.dist-info/RECORD +0 -121
  83. /tracdap/rt/_impl/{guard_rails.py → core/guard_rails.py} +0 -0
  84. /tracdap/rt/_impl/{type_system.py → core/type_system.py} +0 -0
  85. /tracdap/rt/_impl/grpc/tracdap/metadata/{stoarge_pb2.pyi → storage_pb2.pyi} +0 -0
  86. /tracdap/rt/metadata/{stoarge.py → storage.py} +0 -0
  87. {tracdap_runtime-0.7.1.dist-info → tracdap_runtime-0.8.0.dist-info/licenses}/LICENSE +0 -0
  88. {tracdap_runtime-0.7.1.dist-info → tracdap_runtime-0.8.0.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,9 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
+ import contextlib
16
17
  import copy
18
+ import io
17
19
  import logging
18
20
  import pathlib
19
21
  import typing as tp
@@ -24,11 +26,13 @@ import tracdap.rt.api as _api
24
26
  import tracdap.rt.api.experimental as _eapi
25
27
  import tracdap.rt.metadata as _meta
26
28
  import tracdap.rt.exceptions as _ex
27
- import tracdap.rt._impl.type_system as _types # noqa
28
- import tracdap.rt._impl.data as _data # noqa
29
- import tracdap.rt._impl.storage as _storage # noqa
30
- import tracdap.rt._impl.util as _util # noqa
31
- import tracdap.rt._impl.validation as _val # noqa
29
+ import tracdap.rt._impl.core.data as _data
30
+ import tracdap.rt._impl.core.logging as _logging
31
+ import tracdap.rt._impl.core.storage as _storage
32
+ import tracdap.rt._impl.core.struct as _struct
33
+ import tracdap.rt._impl.core.type_system as _types
34
+ import tracdap.rt._impl.core.util as _util
35
+ import tracdap.rt._impl.core.validation as _val
32
36
 
33
37
 
34
38
  class TracContextImpl(_api.TracContext):
@@ -61,10 +65,15 @@ class TracContextImpl(_api.TracContext):
61
65
  model_class: _api.TracModel.__class__,
62
66
  local_ctx: tp.Dict[str, tp.Any],
63
67
  dynamic_outputs: tp.List[str] = None,
64
- checkout_directory: pathlib.Path = None):
68
+ checkout_directory: pathlib.Path = None,
69
+ log_provider: _logging.LogProvider = None):
65
70
 
66
- self.__ctx_log = _util.logger_for_object(self)
67
- self.__model_log = _util.logger_for_class(model_class)
71
+ # If no log provider is supplied, use the default (system logs only)
72
+ if log_provider is None:
73
+ log_provider = _logging.LogProvider()
74
+
75
+ self.__ctx_log = log_provider.logger_for_object(self)
76
+ self.__model_log = log_provider.logger_for_class(model_class)
68
77
 
69
78
  self.__model_def = model_def
70
79
  self.__model_class = model_class
@@ -82,9 +91,9 @@ class TracContextImpl(_api.TracContext):
82
91
 
83
92
  _val.validate_signature(self.get_parameter, parameter_name)
84
93
 
85
- self.__val.check_param_valid_identifier(parameter_name)
86
- self.__val.check_param_defined_in_model(parameter_name)
87
- self.__val.check_param_available_in_context(parameter_name)
94
+ self.__val.check_item_valid_identifier(parameter_name, TracContextValidator.PARAMETER)
95
+ self.__val.check_item_defined_in_model(parameter_name, TracContextValidator.PARAMETER)
96
+ self.__val.check_item_available_in_context(parameter_name, TracContextValidator.PARAMETER)
88
97
 
89
98
  value: _meta.Value = self.__local_ctx.get(parameter_name)
90
99
 
@@ -96,8 +105,8 @@ class TracContextImpl(_api.TracContext):
96
105
 
97
106
  _val.validate_signature(self.has_dataset, dataset_name)
98
107
 
99
- self.__val.check_dataset_valid_identifier(dataset_name)
100
- self.__val.check_dataset_defined_in_model(dataset_name)
108
+ self.__val.check_item_valid_identifier(dataset_name, TracContextValidator.DATASET)
109
+ self.__val.check_item_defined_in_model(dataset_name, TracContextValidator.DATASET)
101
110
 
102
111
  data_view: _data.DataView = self.__local_ctx.get(dataset_name)
103
112
 
@@ -105,6 +114,7 @@ class TracContextImpl(_api.TracContext):
105
114
  return False
106
115
 
107
116
  self.__val.check_context_object_type(dataset_name, data_view, _data.DataView)
117
+ self.__val.check_context_data_view_type(dataset_name, data_view, _meta.ObjectType.DATA)
108
118
 
109
119
  return not data_view.is_empty()
110
120
 
@@ -112,9 +122,9 @@ class TracContextImpl(_api.TracContext):
112
122
 
113
123
  _val.validate_signature(self.get_schema, dataset_name)
114
124
 
115
- self.__val.check_dataset_valid_identifier(dataset_name)
116
- self.__val.check_dataset_defined_in_model(dataset_name)
117
- self.__val.check_dataset_available_in_context(dataset_name)
125
+ self.__val.check_item_valid_identifier(dataset_name, TracContextValidator.DATASET)
126
+ self.__val.check_item_defined_in_model(dataset_name, TracContextValidator.DATASET)
127
+ self.__val.check_item_available_in_context(dataset_name, TracContextValidator.DATASET)
118
128
 
119
129
  static_schema = self.__get_static_schema(self.__model_def, dataset_name)
120
130
  data_view: _data.DataView = self.__local_ctx.get(dataset_name)
@@ -123,6 +133,7 @@ class TracContextImpl(_api.TracContext):
123
133
  # This ensures errors are always reported and is consistent with get_pandas_table()
124
134
 
125
135
  self.__val.check_context_object_type(dataset_name, data_view, _data.DataView)
136
+ self.__val.check_context_data_view_type(dataset_name, data_view, _meta.ObjectType.DATA)
126
137
  self.__val.check_dataset_schema_defined(dataset_name, data_view)
127
138
 
128
139
  # If a static schema exists, that takes priority
@@ -138,9 +149,9 @@ class TracContextImpl(_api.TracContext):
138
149
  _val.validate_signature(self.get_table, dataset_name, framework)
139
150
  _val.require_package(framework.protocol_name, framework.api_type)
140
151
 
141
- self.__val.check_dataset_valid_identifier(dataset_name)
142
- self.__val.check_dataset_defined_in_model(dataset_name)
143
- self.__val.check_dataset_available_in_context(dataset_name)
152
+ self.__val.check_item_valid_identifier(dataset_name, TracContextValidator.DATASET)
153
+ self.__val.check_item_defined_in_model(dataset_name, TracContextValidator.DATASET)
154
+ self.__val.check_item_available_in_context(dataset_name, TracContextValidator.DATASET)
144
155
  self.__val.check_data_framework_args(framework, framework_args)
145
156
 
146
157
  static_schema = self.__get_static_schema(self.__model_def, dataset_name)
@@ -150,6 +161,7 @@ class TracContextImpl(_api.TracContext):
150
161
  converter = _data.DataConverter.for_framework(framework, **framework_args)
151
162
 
152
163
  self.__val.check_context_object_type(dataset_name, data_view, _data.DataView)
164
+ self.__val.check_context_data_view_type(dataset_name, data_view, _meta.ObjectType.DATA)
153
165
  self.__val.check_dataset_schema_defined(dataset_name, data_view)
154
166
  self.__val.check_dataset_part_present(dataset_name, data_view, part_key)
155
167
 
@@ -174,6 +186,45 @@ class TracContextImpl(_api.TracContext):
174
186
 
175
187
  return self.get_table(dataset_name, _eapi.POLARS)
176
188
 
189
+ def get_struct(self, struct_name: str, python_class: type[_eapi.STRUCT_TYPE] = None) -> _eapi.STRUCT_TYPE:
190
+
191
+ _val.validate_signature(self.get_struct, struct_name, python_class)
192
+
193
+ self.__val.check_item_valid_identifier(struct_name, TracContextValidator.DATASET)
194
+ self.__val.check_item_defined_in_model(struct_name, TracContextValidator.DATASET)
195
+ self.__val.check_item_available_in_context(struct_name, TracContextValidator.DATASET)
196
+
197
+ data_view: _data.DataView = self.__local_ctx.get(struct_name)
198
+ part_key = _data.DataPartKey.for_root()
199
+
200
+ self.__val.check_context_object_type(struct_name, data_view, _data.DataView)
201
+ self.__val.check_context_data_view_type(struct_name, data_view, _meta.ObjectType.DATA)
202
+ self.__val.check_dataset_schema_defined(struct_name, data_view)
203
+
204
+ struct_data: dict = data_view.parts[part_key][0].content
205
+ return _struct.StructProcessor.parse_struct(struct_data, None, python_class)
206
+
207
+ def get_file(self, file_name: str) -> bytes:
208
+
209
+ _val.validate_signature(self.get_file, file_name)
210
+
211
+ self.__val.check_item_valid_identifier(file_name, TracContextValidator.FILE)
212
+ self.__val.check_item_defined_in_model(file_name, TracContextValidator.FILE)
213
+ self.__val.check_item_available_in_context(file_name, TracContextValidator.FILE)
214
+
215
+ file_view: _data.DataView = self.__local_ctx.get(file_name)
216
+
217
+ self.__val.check_context_object_type(file_name, file_view, _data.DataView)
218
+ self.__val.check_context_data_view_type(file_name, file_view, _meta.ObjectType.FILE)
219
+ self.__val.check_file_content_present(file_name, file_view)
220
+
221
+ return file_view.file_item.content
222
+
223
+ def get_file_stream(self, file_name: str) -> tp.ContextManager[tp.BinaryIO]:
224
+
225
+ buffer = self.get_file(file_name)
226
+ return contextlib.closing(io.BytesIO(buffer))
227
+
177
228
  def put_schema(self, dataset_name: str, schema: _meta.SchemaDefinition):
178
229
 
179
230
  _val.validate_signature(self.get_schema, dataset_name, schema)
@@ -182,7 +233,7 @@ class TracContextImpl(_api.TracContext):
182
233
  # If field ordering is not assigned by the model, assign it here (model code will not see the numbers)
183
234
  schema_copy = self.__assign_field_order(copy.deepcopy(schema))
184
235
 
185
- self.__val.check_dataset_valid_identifier(dataset_name)
236
+ self.__val.check_item_valid_identifier(dataset_name, TracContextValidator.DATASET)
186
237
  self.__val.check_dataset_is_dynamic_output(dataset_name)
187
238
  self.__val.check_provided_schema_is_valid(dataset_name, schema_copy)
188
239
 
@@ -197,6 +248,7 @@ class TracContextImpl(_api.TracContext):
197
248
 
198
249
  # If there is a prior view it must contain nothing and will be replaced
199
250
  self.__val.check_context_object_type(dataset_name, data_view, _data.DataView)
251
+ self.__val.check_context_data_view_type(dataset_name, data_view, _meta.ObjectType.DATA)
200
252
  self.__val.check_dataset_schema_not_defined(dataset_name, data_view)
201
253
  self.__val.check_dataset_is_empty(dataset_name, data_view)
202
254
 
@@ -216,8 +268,8 @@ class TracContextImpl(_api.TracContext):
216
268
 
217
269
  _val.require_package(framework.protocol_name, framework.api_type)
218
270
 
219
- self.__val.check_dataset_valid_identifier(dataset_name)
220
- self.__val.check_dataset_is_model_output(dataset_name)
271
+ self.__val.check_item_valid_identifier(dataset_name, TracContextValidator.DATASET)
272
+ self.__val.check_item_is_model_output(dataset_name, TracContextValidator.DATASET)
221
273
  self.__val.check_provided_dataset_type(dataset, framework.api_type)
222
274
  self.__val.check_data_framework_args(framework, framework_args)
223
275
 
@@ -234,19 +286,22 @@ class TracContextImpl(_api.TracContext):
234
286
  data_view = _data.DataView.create_empty()
235
287
 
236
288
  self.__val.check_context_object_type(dataset_name, data_view, _data.DataView)
289
+ self.__val.check_context_data_view_type(dataset_name, data_view, _meta.ObjectType.DATA)
237
290
  self.__val.check_dataset_schema_defined(dataset_name, data_view)
238
291
  self.__val.check_dataset_part_not_present(dataset_name, data_view, part_key)
239
292
 
240
293
  # Prefer static schemas for data conformance
241
294
 
242
295
  if static_schema is not None:
243
- schema = _data.DataMapping.trac_to_arrow_schema(static_schema)
296
+ trac_schema = static_schema
297
+ native_schema = _data.DataMapping.trac_to_arrow_schema(static_schema)
244
298
  else:
245
- schema = data_view.arrow_schema
299
+ trac_schema = _data.DataMapping.arrow_to_trac_schema(data_view.arrow_schema)
300
+ native_schema = data_view.arrow_schema
246
301
 
247
302
  # Data conformance is applied automatically inside the converter, if schema != None
248
- table = converter.to_internal(dataset, schema)
249
- item = _data.DataItem(schema, table)
303
+ table = converter.to_internal(dataset, native_schema)
304
+ item = _data.DataItem.for_table(table, native_schema, trac_schema)
250
305
 
251
306
  updated_view = _data.DataMapping.add_item_to_view(data_view, part_key, item)
252
307
 
@@ -260,6 +315,73 @@ class TracContextImpl(_api.TracContext):
260
315
 
261
316
  self.put_table(dataset_name, dataset, _eapi.POLARS)
262
317
 
318
+ def put_struct(self, struct_name: str, struct: _eapi.STRUCT_TYPE):
319
+
320
+ _val.validate_signature(self.put_struct, struct_name, struct)
321
+
322
+ self.__val.check_item_valid_identifier(struct_name, TracContextValidator.DATASET)
323
+ self.__val.check_item_is_model_output(struct_name, TracContextValidator.DATASET)
324
+
325
+ static_schema = self.__get_static_schema(self.__model_def, struct_name)
326
+ data_view = self.__local_ctx.get(struct_name)
327
+ part_key = _data.DataPartKey.for_root()
328
+
329
+ if data_view is None:
330
+ if static_schema is not None:
331
+ data_view = _data.DataView.for_trac_schema(static_schema)
332
+ else:
333
+ data_view = _data.DataView.create_empty()
334
+
335
+ self.__val.check_context_object_type(struct_name, data_view, _data.DataView)
336
+ self.__val.check_context_data_view_type(struct_name, data_view, _meta.ObjectType.DATA)
337
+ self.__val.check_dataset_schema_defined(struct_name, data_view)
338
+ self.__val.check_dataset_part_not_present(struct_name, data_view, part_key)
339
+
340
+ data_item = _data.DataItem.for_struct(struct)
341
+ updated_view = _data.DataMapping.add_item_to_view(data_view, part_key, data_item)
342
+
343
+ self.__local_ctx[struct_name] = updated_view
344
+
345
+ def put_file(self, file_name: str, file_content: tp.Union[bytes, bytearray]):
346
+
347
+ _val.validate_signature(self.put_file, file_name, file_content)
348
+
349
+ self.__val.check_item_valid_identifier(file_name, TracContextValidator.FILE)
350
+ self.__val.check_item_is_model_output(file_name, TracContextValidator.FILE)
351
+
352
+ file_view: _data.DataView = self.__local_ctx.get(file_name)
353
+
354
+ if file_view is None:
355
+ file_view = _data.DataView.create_empty(_meta.ObjectType.FILE)
356
+
357
+ self.__val.check_context_object_type(file_name, file_view, _data.DataView)
358
+ self.__val.check_context_data_view_type(file_name, file_view, _meta.ObjectType.FILE)
359
+ self.__val.check_file_content_not_present(file_name, file_view)
360
+
361
+ if isinstance(file_content, bytearray):
362
+ file_content = bytes(bytearray)
363
+
364
+ file_item = _data.DataItem.for_file_content(file_content)
365
+ self.__local_ctx[file_name] = file_view.with_file_item(file_item)
366
+
367
+ def put_file_stream(self, file_name: str) -> tp.ContextManager[tp.BinaryIO]:
368
+
369
+ _val.validate_signature(self.put_file_stream, file_name)
370
+
371
+ self.__val.check_item_valid_identifier(file_name, TracContextValidator.FILE)
372
+ self.__val.check_item_is_model_output(file_name, TracContextValidator.FILE)
373
+
374
+ @contextlib.contextmanager
375
+ def memory_stream(stream: io.BytesIO):
376
+ try:
377
+ yield stream
378
+ buffer = stream.getbuffer().tobytes()
379
+ self.put_file(file_name, buffer)
380
+ finally:
381
+ stream.close()
382
+
383
+ return memory_stream(io.BytesIO())
384
+
263
385
  def log(self) -> logging.Logger:
264
386
 
265
387
  _val.validate_signature(self.log)
@@ -300,9 +422,9 @@ class TracDataContextImpl(TracContextImpl, _eapi.TracDataContext):
300
422
  self, model_def: _meta.ModelDefinition, model_class: _api.TracModel.__class__,
301
423
  local_ctx: tp.Dict[str, tp.Any], dynamic_outputs: tp.List[str],
302
424
  storage_map: tp.Dict[str, tp.Union[_eapi.TracFileStorage, _eapi.TracDataStorage]],
303
- checkout_directory: pathlib.Path = None):
425
+ checkout_directory: pathlib.Path = None, log_provider: _logging.LogProvider = None):
304
426
 
305
- super().__init__(model_def, model_class, local_ctx, dynamic_outputs, checkout_directory)
427
+ super().__init__(model_def, model_class, local_ctx, dynamic_outputs, checkout_directory, log_provider)
306
428
 
307
429
  self.__model_def = model_def
308
430
  self.__local_ctx = local_ctx
@@ -310,7 +432,7 @@ class TracDataContextImpl(TracContextImpl, _eapi.TracDataContext):
310
432
  self.__storage_map = storage_map
311
433
  self.__checkout_directory = checkout_directory
312
434
 
313
- self.__val = self._TracContextImpl__val # noqa
435
+ self.__val: TracContextValidator = self._TracContextImpl__val # noqa
314
436
 
315
437
  def get_file_storage(self, storage_key: str) -> _eapi.TracFileStorage:
316
438
 
@@ -348,9 +470,9 @@ class TracDataContextImpl(TracContextImpl, _eapi.TracDataContext):
348
470
 
349
471
  _val.validate_signature(self.add_data_import, dataset_name)
350
472
 
351
- self.__val.check_dataset_valid_identifier(dataset_name)
352
- self.__val.check_dataset_not_defined_in_model(dataset_name)
353
- self.__val.check_dataset_not_available_in_context(dataset_name)
473
+ self.__val.check_item_valid_identifier(dataset_name, TracContextValidator.DATASET)
474
+ self.__val.check_item_not_defined_in_model(dataset_name, TracContextValidator.DATASET)
475
+ self.__val.check_item_not_available_in_context(dataset_name, TracContextValidator.DATASET)
354
476
 
355
477
  self.__local_ctx[dataset_name] = _data.DataView.create_empty()
356
478
  self.__dynamic_outputs.append(dataset_name)
@@ -359,8 +481,8 @@ class TracDataContextImpl(TracContextImpl, _eapi.TracDataContext):
359
481
 
360
482
  _val.validate_signature(self.set_source_metadata, dataset_name, storage_key, source_info)
361
483
 
362
- self.__val.check_dataset_valid_identifier(dataset_name)
363
- self.__val.check_dataset_available_in_context(dataset_name)
484
+ self.__val.check_item_valid_identifier(dataset_name, TracContextValidator.DATASET)
485
+ self.__val.check_item_available_in_context(dataset_name, TracContextValidator.DATASET)
364
486
  self.__val.check_storage_valid_identifier(storage_key)
365
487
  self.__val.check_storage_available(self.__storage_map, storage_key)
366
488
 
@@ -368,11 +490,11 @@ class TracDataContextImpl(TracContextImpl, _eapi.TracDataContext):
368
490
 
369
491
  if isinstance(storage, _eapi.TracFileStorage):
370
492
  if not isinstance(source_info, _eapi.FileStat):
371
- self.__val.report_public_error(f"Expected storage_info to be a FileStat, [{storage_key}] refers to file storage")
493
+ self.__val.report_public_error(_ex.ERuntimeValidation(f"Expected storage_info to be a FileStat, [{storage_key}] refers to file storage"))
372
494
 
373
495
  if isinstance(storage, _eapi.TracDataStorage):
374
496
  if not isinstance(source_info, str):
375
- self.__val.report_public_error(f"Expected storage_info to be a table name, [{storage_key}] refers to dadta storage")
497
+ self.__val.report_public_error(_ex.ERuntimeValidation(f"Expected storage_info to be a table name, [{storage_key}] refers to dadta storage"))
376
498
 
377
499
  pass # Not implemented yet, only required when imports are sent back to the platform
378
500
 
@@ -392,7 +514,9 @@ class TracDataContextImpl(TracContextImpl, _eapi.TracDataContext):
392
514
 
393
515
  class TracFileStorageImpl(_eapi.TracFileStorage):
394
516
 
395
- def __init__(self, storage_key: str, storage_impl: _storage.IFileStorage, write_access: bool, checkout_directory):
517
+ def __init__(
518
+ self, storage_key: str, storage_impl: _storage.IFileStorage,
519
+ write_access: bool, checkout_directory, log_provider: _logging.LogProvider):
396
520
 
397
521
  self.__storage_key = storage_key
398
522
 
@@ -413,7 +537,11 @@ class TracFileStorageImpl(_eapi.TracFileStorage):
413
537
  self.__rmdir = None
414
538
  self.__write_byte_stream = None
415
539
 
416
- self.__log = _util.logger_for_object(self)
540
+ # If no log provider is supplied, use the default (system logs only)
541
+ if log_provider is None:
542
+ log_provider = _logging.LogProvider()
543
+
544
+ self.__log = log_provider.logger_for_object(self)
417
545
  self.__val = TracStorageValidator(self.__log, checkout_directory, self.__storage_key)
418
546
 
419
547
  def get_storage_key(self) -> str:
@@ -534,7 +662,7 @@ class TracDataStorageImpl(_eapi.TracDataStorage[_eapi.DATA_API]):
534
662
  def __init__(
535
663
  self, storage_key: str, storage_impl: _storage.IDataStorageBase[_data.T_INTERNAL_DATA, _data.T_INTERNAL_SCHEMA],
536
664
  data_converter: _data.DataConverter[_eapi.DATA_API, _data.T_INTERNAL_DATA, _data.T_INTERNAL_SCHEMA],
537
- write_access: bool, checkout_directory):
665
+ write_access: bool, checkout_directory, log_provider: _logging.LogProvider):
538
666
 
539
667
  self.__storage_key = storage_key
540
668
  self.__converter = data_converter
@@ -551,7 +679,11 @@ class TracDataStorageImpl(_eapi.TracDataStorage[_eapi.DATA_API]):
551
679
  self.__create_table = None
552
680
  self.__write_table = None
553
681
 
554
- self.__log = _util.logger_for_object(self)
682
+ # If no log provider is supplied, use the default (system logs only)
683
+ if log_provider is None:
684
+ log_provider = _logging.LogProvider()
685
+
686
+ self.__log = log_provider.logger_for_object(self)
555
687
  self.__val = TracStorageValidator(self.__log, checkout_directory, self.__storage_key)
556
688
 
557
689
  def has_table(self, table_name: str) -> bool:
@@ -684,6 +816,10 @@ class TracContextErrorReporter:
684
816
 
685
817
  class TracContextValidator(TracContextErrorReporter):
686
818
 
819
+ PARAMETER = "Parameter"
820
+ DATASET = "Dataset"
821
+ FILE = "File"
822
+
687
823
  def __init__(
688
824
  self, log: logging.Logger,
689
825
  model_def: _meta.ModelDefinition,
@@ -697,49 +833,45 @@ class TracContextValidator(TracContextErrorReporter):
697
833
  self.__local_ctx = local_ctx
698
834
  self.__dynamic_outputs = dynamic_outputs
699
835
 
700
- def check_param_valid_identifier(self, param_name: str):
836
+ def check_item_valid_identifier(self, item_name: str, item_type: str):
701
837
 
702
- if param_name is None:
703
- self._report_error(f"Parameter name is null")
838
+ if item_name is None:
839
+ self._report_error(f"{item_type} name is null")
704
840
 
705
- if not self._VALID_IDENTIFIER.match(param_name):
706
- self._report_error(f"Parameter name {param_name} is not a valid identifier")
841
+ if not self._VALID_IDENTIFIER.match(item_name):
842
+ self._report_error(f"{item_type} name {item_name} is not a valid identifier")
707
843
 
708
- def check_param_defined_in_model(self, param_name: str):
844
+ def check_item_defined_in_model(self, item_name: str, item_type: str):
709
845
 
710
- if param_name not in self.__model_def.parameters:
711
- self._report_error(f"Parameter {param_name} is not defined in the model")
712
-
713
- def check_param_available_in_context(self, param_name: str):
714
-
715
- if param_name not in self.__local_ctx:
716
- self._report_error(f"Parameter {param_name} is not available in the current context")
717
-
718
- def check_dataset_valid_identifier(self, dataset_name: str):
846
+ if item_type == self.PARAMETER:
847
+ if item_name not in self.__model_def.parameters:
848
+ self._report_error(f"{item_type} {item_name} is not defined in the model")
849
+ else:
850
+ if item_name not in self.__model_def.inputs and item_name not in self.__model_def.outputs:
851
+ self._report_error(f"{item_type} {item_name} is not defined in the model")
719
852
 
720
- if dataset_name is None:
721
- self._report_error(f"Dataset name is null")
853
+ def check_item_not_defined_in_model(self, item_name: str, item_type: str):
722
854
 
723
- if not self._VALID_IDENTIFIER.match(dataset_name):
724
- self._report_error(f"Dataset name {dataset_name} is not a valid identifier")
855
+ if item_name in self.__model_def.inputs or item_name in self.__model_def.outputs:
856
+ self._report_error(f"{item_type} {item_name} is already defined in the model")
725
857
 
726
- def check_dataset_not_defined_in_model(self, dataset_name: str):
858
+ if item_name in self.__model_def.parameters:
859
+ self._report_error(f"{item_name} name {item_name} is already in use as a model parameter")
727
860
 
728
- if dataset_name in self.__model_def.inputs or dataset_name in self.__model_def.outputs:
729
- self._report_error(f"Dataset {dataset_name} is already defined in the model")
861
+ def check_item_is_model_output(self, item_name: str, item_type: str):
730
862
 
731
- if dataset_name in self.__model_def.parameters:
732
- self._report_error(f"Dataset name {dataset_name} is already in use as a model parameter")
863
+ if item_name not in self.__model_def.outputs and item_name not in self.__dynamic_outputs:
864
+ self._report_error(f"{item_type} {item_name} is not defined as a model output")
733
865
 
734
- def check_dataset_defined_in_model(self, dataset_name: str):
866
+ def check_item_available_in_context(self, item_name: str, item_type: str):
735
867
 
736
- if dataset_name not in self.__model_def.inputs and dataset_name not in self.__model_def.outputs:
737
- self._report_error(f"Dataset {dataset_name} is not defined in the model")
868
+ if item_name not in self.__local_ctx:
869
+ self._report_error(f"{item_type} {item_name} is not available in the current context")
738
870
 
739
- def check_dataset_is_model_output(self, dataset_name: str):
871
+ def check_item_not_available_in_context(self, item_name: str, item_type: str):
740
872
 
741
- if dataset_name not in self.__model_def.outputs and dataset_name not in self.__dynamic_outputs:
742
- self._report_error(f"Dataset {dataset_name} is not defined as a model output")
873
+ if item_name in self.__local_ctx:
874
+ self._report_error(f"{item_type} {item_name} already exists in the current context")
743
875
 
744
876
  def check_dataset_is_dynamic_output(self, dataset_name: str):
745
877
 
@@ -752,21 +884,17 @@ class TracContextValidator(TracContextErrorReporter):
752
884
  if model_output and not model_output.dynamic:
753
885
  self._report_error(f"Model output {dataset_name} is not a dynamic output")
754
886
 
755
- def check_dataset_available_in_context(self, item_name: str):
756
-
757
- if item_name not in self.__local_ctx:
758
- self._report_error(f"Dataset {item_name} is not available in the current context")
759
-
760
- def check_dataset_not_available_in_context(self, item_name: str):
761
-
762
- if item_name in self.__local_ctx:
763
- self._report_error(f"Dataset {item_name} already exists in the current context")
764
-
765
887
  def check_dataset_schema_defined(self, dataset_name: str, data_view: _data.DataView):
766
888
 
767
889
  schema = data_view.trac_schema if data_view is not None else None
768
890
 
769
- if schema is None or schema.table is None or not schema.table.fields:
891
+ if schema is None:
892
+ self._report_error(f"Schema not defined for dataset {dataset_name} in the current context")
893
+
894
+ if schema.schemaType == _meta.SchemaType.TABLE and (schema.table is None or not schema.table.fields):
895
+ self._report_error(f"Schema not defined for dataset {dataset_name} in the current context")
896
+
897
+ if schema.schemaType == _meta.SchemaType.STRUCT and (schema.struct is None or not schema.struct.fields):
770
898
  self._report_error(f"Schema not defined for dataset {dataset_name} in the current context")
771
899
 
772
900
  def check_dataset_schema_not_defined(self, dataset_name: str, data_view: _data.DataView):
@@ -834,6 +962,14 @@ class TracContextValidator(TracContextErrorReporter):
834
962
  f"The object referenced by [{item_name}] in the current context has the wrong type" +
835
963
  f" (expected {expected_type_name}, got {actual_type_name})")
836
964
 
965
+ def check_context_data_view_type(self, item_name: str, data_vew: _data.DataView, expected_type: _meta.ObjectType):
966
+
967
+ if data_vew.object_type != expected_type:
968
+
969
+ self._report_error(
970
+ f"The object referenced by [{item_name}] in the current context has the wrong type" +
971
+ f" (expected {expected_type.name}, got {data_vew.object_type.name})")
972
+
837
973
  def check_data_framework_args(self, framework: _eapi.DataFramework, framework_args: tp.Dict[str, tp.Any]):
838
974
 
839
975
  expected_args = _data.DataConverter.get_framework_args(framework)
@@ -861,6 +997,16 @@ class TracContextValidator(TracContextErrorReporter):
861
997
  f"Using [{framework}], argument [{arg_name}] has the wrong type" +
862
998
  f" (expected {expected_type_name}, got {actual_type_name})")
863
999
 
1000
+ def check_file_content_present(self, file_name: str, file_view: _data.DataView):
1001
+
1002
+ if file_view.file_item is None or file_view.file_item.content is None:
1003
+ self._report_error(f"File content is missing or empty for [{file_name}] in the current context")
1004
+
1005
+ def check_file_content_not_present(self, file_name: str, file_view: _data.DataView):
1006
+
1007
+ if file_view.file_item is not None and file_view.file_item.content is not None:
1008
+ self._report_error(f"File content is already present for [{file_name}] in the current context")
1009
+
864
1010
  def check_storage_valid_identifier(self, storage_key):
865
1011
 
866
1012
  if storage_key is None:
@@ -878,7 +1024,7 @@ class TracContextValidator(TracContextErrorReporter):
878
1024
 
879
1025
  def check_storage_type(
880
1026
  self, storage_map: tp.Dict, storage_key: str,
881
- storage_type: tp.Union[_eapi.TracFileStorage.__class__]):
1027
+ storage_type: tp.Union[_eapi.TracFileStorage.__class__, _eapi.TracDataStorage.__class__]):
882
1028
 
883
1029
  storage_instance = storage_map.get(storage_key)
884
1030