tracdap-runtime 0.5.30__py3-none-any.whl → 0.6.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. tracdap/rt/_exec/dev_mode.py +2 -1
  2. tracdap/rt/_impl/data.py +1 -28
  3. tracdap/rt/_impl/static_api.py +5 -1
  4. tracdap/rt/_impl/storage.py +586 -10
  5. tracdap/rt/_impl/util.py +24 -3
  6. tracdap/rt/_plugins/_helpers.py +26 -25
  7. tracdap/rt/_plugins/storage_aws.py +162 -76
  8. tracdap/rt/_plugins/storage_azure.py +155 -0
  9. tracdap/rt/_plugins/storage_gcp.py +183 -0
  10. tracdap/rt/_plugins/storage_local.py +249 -98
  11. tracdap/rt/_version.py +1 -1
  12. tracdap/rt/api/static_api.py +2 -1
  13. tracdap/rt/config/__init__.py +8 -13
  14. tracdap/rt/config/common.py +10 -0
  15. tracdap/rt/config/common_pb2.py +38 -31
  16. tracdap/rt/config/job_pb2.py +21 -20
  17. tracdap/rt/config/platform.py +60 -25
  18. tracdap/rt/config/platform_pb2.py +52 -45
  19. tracdap/rt/config/result_pb2.py +15 -14
  20. tracdap/rt/config/runtime.py +0 -1
  21. tracdap/rt/config/runtime_pb2.py +24 -24
  22. tracdap/rt/exceptions.py +9 -0
  23. tracdap/rt/ext/plugins.py +0 -12
  24. tracdap/rt/ext/storage.py +47 -29
  25. tracdap/rt/metadata/common_pb2.py +15 -14
  26. tracdap/rt/metadata/custom_pb2.py +9 -8
  27. tracdap/rt/metadata/data_pb2.py +31 -30
  28. tracdap/rt/metadata/file_pb2.py +9 -8
  29. tracdap/rt/metadata/flow_pb2.py +33 -32
  30. tracdap/rt/metadata/job_pb2.py +55 -54
  31. tracdap/rt/metadata/model_pb2.py +31 -30
  32. tracdap/rt/metadata/object_id_pb2.py +13 -12
  33. tracdap/rt/metadata/object_pb2.py +9 -8
  34. tracdap/rt/metadata/search_pb2.py +19 -18
  35. tracdap/rt/metadata/stoarge_pb2.py +31 -30
  36. tracdap/rt/metadata/tag_pb2.py +13 -12
  37. tracdap/rt/metadata/tag_update_pb2.py +11 -10
  38. tracdap/rt/metadata/type_pb2.py +29 -28
  39. {tracdap_runtime-0.5.30.dist-info → tracdap_runtime-0.6.0.dev1.dist-info}/METADATA +26 -15
  40. {tracdap_runtime-0.5.30.dist-info → tracdap_runtime-0.6.0.dev1.dist-info}/RECORD +43 -43
  41. tracdap/rt/config/gateway.py +0 -104
  42. tracdap/rt/config/gateway_pb2.py +0 -45
  43. {tracdap_runtime-0.5.30.dist-info → tracdap_runtime-0.6.0.dev1.dist-info}/LICENSE +0 -0
  44. {tracdap_runtime-0.5.30.dist-info → tracdap_runtime-0.6.0.dev1.dist-info}/WHEEL +0 -0
  45. {tracdap_runtime-0.5.30.dist-info → tracdap_runtime-0.6.0.dev1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,183 @@
1
+ # Copyright 2023 Accenture Global Solutions Limited
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import typing as tp
16
+ import datetime as dt
17
+
18
+ import tracdap.rt.exceptions as ex
19
+
20
+ # Import storage interfaces
21
+ import tracdap.rt.ext.plugins as plugins
22
+ from tracdap.rt.ext.storage import *
23
+
24
+ from pyarrow import fs as pa_fs
25
+
26
+ # Set of common helpers across the core plugins (do not reference rt._impl)
27
+ from . import _helpers
28
+
29
+
30
+ try:
31
+ # These dependencies are provided by the optional [gcp] feature
32
+ # For local development, pip install -r requirements_plugins.txt
33
+ import google.cloud.storage as gcs # noqa
34
+ import gcsfs # noqa
35
+ __gcp_available = True
36
+ except ImportError:
37
+ gcs = None
38
+ gcsfs = None
39
+ __gcp_available = False
40
+
41
+
42
+ class GcpStorageProvider(IStorageProvider):
43
+
44
+ BUCKET_PROPERTY = "bucket"
45
+ PREFIX_PROPERTY = "prefix"
46
+ REGION_PROPERTY = "region"
47
+ ENDPOINT_PROPERTY = "endpoint"
48
+
49
+ CREDENTIALS_PROPERTY = "credentials"
50
+ CREDENTIALS_ADC = "adc"
51
+ CREDENTIALS_ACCESS_TOKEN = "access_token"
52
+
53
+ ACCESS_TOKEN = "accessToken"
54
+ ACCESS_TOKEN_EXPIRY = "accessTokenExpiry"
55
+ ACCESS_TOKEN_EXPIRY_DEFAULT = 3600
56
+
57
+ RUNTIME_FS_PROPERTY = "runtimeFs"
58
+ RUNTIME_FS_AUTO = "auto"
59
+ RUNTIME_FS_ARROW = "arrow"
60
+ RUNTIME_FS_FSSPEC = "fsspec"
61
+ RUNTIME_FS_DEFAULT = RUNTIME_FS_AUTO
62
+
63
+ ARROW_CLIENT_ARGS = {
64
+ REGION_PROPERTY: "default_bucket_location",
65
+ ENDPOINT_PROPERTY: "endpoint_override"
66
+ }
67
+
68
+ FSSPEC_CLIENT_ARGS = {
69
+ REGION_PROPERTY: "default_location",
70
+ ENDPOINT_PROPERTY: "endpoint_url"
71
+ }
72
+
73
+ try:
74
+ __arrow_available = pa_fs.GcsFileSystem is not None
75
+ except ImportError:
76
+ __arrow_available = False
77
+
78
+ def __init__(self, properties: tp.Dict[str, str]):
79
+
80
+ self._log = _helpers.logger_for_object(self)
81
+ self._properties = properties
82
+
83
+ self._runtime_fs = _helpers.get_plugin_property(
84
+ properties, self.RUNTIME_FS_PROPERTY) \
85
+ or self.RUNTIME_FS_DEFAULT
86
+
87
+ def has_arrow_native(self) -> bool:
88
+ return True
89
+
90
+ def get_arrow_native(self) -> pa_fs.SubTreeFileSystem:
91
+
92
+ if self._runtime_fs == self.RUNTIME_FS_AUTO:
93
+ gcs_fs = self.create_arrow() if self.__arrow_available else self.create_fsspec()
94
+ elif self._runtime_fs == self.RUNTIME_FS_ARROW:
95
+ gcs_fs = self.create_arrow()
96
+ elif self._runtime_fs == self.RUNTIME_FS_FSSPEC:
97
+ gcs_fs = self.create_fsspec()
98
+ else:
99
+ message = f"Requested runtime FS [{self._runtime_fs}] is not available for GCP storage"
100
+ self._log.error(message)
101
+ raise ex.EStartup(message)
102
+
103
+ bucket = _helpers.get_plugin_property(self._properties, self.BUCKET_PROPERTY)
104
+ prefix = _helpers.get_plugin_property(self._properties, self.PREFIX_PROPERTY)
105
+
106
+ if bucket is None or len(bucket.strip()) == 0:
107
+ message = f"Missing required config property [{self.BUCKET_PROPERTY}] for GCP storage"
108
+ self._log.error(message)
109
+ raise ex.EConfigParse(message)
110
+
111
+ root_path = f"{bucket}/{prefix}" if prefix else bucket
112
+
113
+ return pa_fs.SubTreeFileSystem(root_path, gcs_fs)
114
+
115
+ def create_arrow(self) -> pa_fs.FileSystem:
116
+
117
+ gcs_arrow_args = self.setup_client_args(self.ARROW_CLIENT_ARGS)
118
+
119
+ return pa_fs.GcsFileSystem(**gcs_arrow_args)
120
+
121
+ def create_fsspec(self) -> pa_fs.FileSystem:
122
+
123
+ gcs_fsspec_args = self.setup_client_args(self.FSSPEC_CLIENT_ARGS)
124
+ gcs_fsspec = gcsfs.GCSFileSystem(**gcs_fsspec_args)
125
+
126
+ return pa_fs.PyFileSystem(pa_fs.FSSpecHandler(gcs_fsspec))
127
+
128
+ def setup_client_args(self, arg_mapping: tp.Dict[str, str]) -> tp.Dict[str, tp.Any]:
129
+
130
+ client_args = dict()
131
+
132
+ region = _helpers.get_plugin_property(self._properties, self.REGION_PROPERTY)
133
+ endpoint = _helpers.get_plugin_property(self._properties, self.ENDPOINT_PROPERTY)
134
+
135
+ if region is not None:
136
+ region_key = arg_mapping[self.REGION_PROPERTY]
137
+ client_args[region_key] = region
138
+
139
+ if endpoint is not None:
140
+ endpoint_key = arg_mapping[self.ENDPOINT_PROPERTY]
141
+ client_args[endpoint_key] = endpoint
142
+
143
+ credentials = self.setup_credentials()
144
+ client_args.update(credentials)
145
+
146
+ return client_args
147
+
148
+ def setup_credentials(self):
149
+
150
+ # Only default (Google ADC) mechanism is supported
151
+ # Arrow GCP FS does also support access tokens, but ADC is probably all we ever need
152
+
153
+ mechanism = _helpers.get_plugin_property(self._properties, self.CREDENTIALS_PROPERTY)
154
+
155
+ if mechanism is None or len(mechanism) == 0 or mechanism.lower() == self.CREDENTIALS_ADC:
156
+ self._log.info(f"Using [{self.CREDENTIALS_ADC}] credentials mechanism")
157
+ return dict()
158
+
159
+ if mechanism == self.CREDENTIALS_ACCESS_TOKEN:
160
+
161
+ self._log.info(f"Using [{self.CREDENTIALS_ACCESS_TOKEN}] credentials mechanism")
162
+
163
+ access_token = _helpers.get_plugin_property(self._properties, self.ACCESS_TOKEN)
164
+ access_token_expiry = _helpers.get_plugin_property(self._properties, self.ACCESS_TOKEN_EXPIRY)
165
+
166
+ if access_token is None or len(access_token.strip()) == 0:
167
+ message = f"Missing required config property [{self.ACCESS_TOKEN}] for GCP storage"
168
+ raise ex.EConfigParse(message)
169
+
170
+ if access_token_expiry is None:
171
+ access_token_expiry = self.ACCESS_TOKEN_EXPIRY_DEFAULT
172
+
173
+ expiry_timestamp = dt.datetime.now(dt.timezone.utc) + dt.timedelta(seconds=float(access_token_expiry))
174
+
175
+ return {"access_token": access_token, "credential_token_expiration": expiry_timestamp}
176
+
177
+ message = f"Unrecognised credentials mechanism: [{mechanism}]"
178
+ self._log.error(message)
179
+ raise ex.EStartup(message)
180
+
181
+
182
+ if __gcp_available:
183
+ plugins.PluginManager.register_plugin(IStorageProvider, GcpStorageProvider, ["GCS"])
@@ -13,6 +13,9 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import datetime as dt
16
+ import os
17
+ import re
18
+ import sys
16
19
  import typing as tp
17
20
  import pathlib
18
21
 
@@ -23,27 +26,61 @@ import tracdap.rt.exceptions as ex
23
26
  import tracdap.rt.ext.plugins as plugins
24
27
  from tracdap.rt.ext.storage import *
25
28
 
29
+ import pyarrow.fs as afs
30
+
26
31
  # Set of common helpers across the core plugins (do not reference rt._impl)
27
32
  from . import _helpers
28
33
 
29
- # TODO: Remove dependencies on internal implementation details
30
- import tracdap.rt._impl.storage as _storage
31
-
32
34
 
33
- class LocalFileStorage(IFileStorage):
35
+ class LocalStorageProvider(IStorageProvider):
34
36
 
35
37
  ROOT_PATH_PROPERTY = "rootPath"
36
38
 
37
- def __init__(self, config: cfg.PluginConfig, options: dict = None):
39
+ RUNTIME_FS_PROPERTY = "runtimeFs"
40
+ RUNTIME_FS_AUTO = "auto"
41
+ RUNTIME_FS_ARROW = "arrow"
42
+ RUNTIME_FS_PYTHON = "python"
43
+ RUNTIME_FS_DEFAULT = RUNTIME_FS_AUTO
44
+
45
+ def __init__(self, properties: tp.Dict[str, str]):
38
46
 
39
47
  self._log = _helpers.logger_for_object(self)
40
- self._properties = config.properties
48
+ self._properties = properties
49
+
50
+ self._root_path = self.check_root_path(self._properties, self._log)
51
+
52
+ self._runtime_fs = _helpers.get_plugin_property(
53
+ properties, self.RUNTIME_FS_PROPERTY) \
54
+ or self.RUNTIME_FS_DEFAULT
55
+
56
+ def has_arrow_native(self) -> bool:
57
+ return self._runtime_fs in [self.RUNTIME_FS_ARROW, self.RUNTIME_FS_AUTO]
58
+
59
+ def has_file_storage(self) -> bool:
60
+ return self._runtime_fs == self.RUNTIME_FS_PYTHON
41
61
 
42
- root_path_config = _helpers.get_plugin_property(self._properties, self.ROOT_PATH_PROPERTY)
62
+ def get_arrow_native(self) -> afs.SubTreeFileSystem:
63
+ root_fs = afs.LocalFileSystem()
64
+ return afs.SubTreeFileSystem(str(self._root_path), root_fs)
65
+
66
+ def get_file_storage(self) -> IFileStorage:
67
+
68
+ config = cfg.PluginConfig()
69
+ config.protocol = "LOCAL"
70
+ config.properties = self._properties
71
+
72
+ options = dict()
73
+
74
+ return LocalFileStorage(config, options)
75
+
76
+ @classmethod
77
+ def check_root_path(cls, properties, log):
78
+
79
+ root_path_config = _helpers.get_plugin_property(properties, cls.ROOT_PATH_PROPERTY)
43
80
 
44
81
  if not root_path_config or root_path_config.isspace():
45
82
  err = f"Storage root path not set"
46
- self._log.error(err)
83
+ log.error(err)
47
84
  raise ex.EStorageRequest(err)
48
85
 
49
86
  supplied_root = pathlib.Path(root_path_config)
@@ -53,17 +90,58 @@ class LocalFileStorage(IFileStorage):
53
90
 
54
91
  else:
55
92
  err = f"Relative path not allowed for storage root [{supplied_root}]"
56
- self._log.error(err)
93
+ log.error(err)
57
94
  raise ex.EStorageConfig(err)
58
95
 
59
96
  try:
60
- self._root_path = absolute_root.resolve(strict=True)
97
+ return absolute_root.resolve(strict=True)
61
98
 
62
99
  except FileNotFoundError as e:
63
100
  err = f"Storage root path does not exist: [{absolute_root}]"
64
- self._log.error(err)
101
+ log.error(err)
65
102
  raise ex.EStorageRequest(err) from e
66
103
 
104
+
105
+ plugins.PluginManager.register_plugin(IStorageProvider, LocalStorageProvider, ["LOCAL", "file"])
106
+
107
+
108
+ # ----------------------------------------------------------------------------------------------------------------------
109
+ # CUSTOM IMPLEMENTATION FOR LOCAL STORAGE
110
+ # ----------------------------------------------------------------------------------------------------------------------
111
+
112
+ # This is the old implementation that was used before Arrow native was made available
113
+ # It is likely to be removed in a future release
114
+
115
+
116
+ class _StreamResource(tp.BinaryIO): # noqa
117
+
118
+ def __init__(self, ctx_mgr, close_func):
119
+ self.__ctx_mgr = ctx_mgr
120
+ self.__close_func = close_func
121
+
122
+ def __getitem__(self, item):
123
+ return self.__ctx_mgr.__getitem__(item)
124
+
125
+ def __enter__(self):
126
+ return self.__ctx_mgr.__enter__()
127
+
128
+ def __exit__(self, exc_type, exc_val, exc_tb):
129
+ try:
130
+ self.__close_func()
131
+ finally:
132
+ self.__ctx_mgr.__exit__(exc_type, exc_val, exc_tb)
133
+
134
+
135
+ class LocalFileStorage(IFileStorage):
136
+
137
+ def __init__(self, config: cfg.PluginConfig, options: dict = None):
138
+
139
+ self._log = _helpers.logger_for_object(self)
140
+ self._properties = config.properties
141
+ self._options = options # Not used
142
+
143
+ self._root_path = LocalStorageProvider.check_root_path(self._properties, self._log)
144
+
67
145
  def _get_root(self):
68
146
  return self._root_path
69
147
 
@@ -74,13 +152,26 @@ class LocalFileStorage(IFileStorage):
74
152
 
75
153
  def _exists(self, storage_path: str) -> bool:
76
154
 
77
- item_path = self._root_path / storage_path
155
+ item_path = self._resolve_path(storage_path, "EXISTS", True)
78
156
  return item_path.exists()
79
157
 
80
158
  def size(self, storage_path: str) -> int:
81
159
 
82
160
  operation = f"SIZE [{storage_path}]"
83
- return self._error_handling(operation, lambda: self._stat(storage_path).size)
161
+ return self._error_handling(operation, lambda: self._size(storage_path))
162
+
163
+ def _size(self, storage_path: str) -> int:
164
+
165
+ item_path = self._resolve_path(storage_path, "SIZE", True)
166
+
167
+ if not item_path.exists():
168
+ raise ex.EStorageRequest(f"Storage path does not exist: SIZE [{storage_path}]")
169
+
170
+ if not item_path.is_file():
171
+ raise ex.EStorageRequest(f"Storage path is not a file: SIZE [{storage_path}]")
172
+
173
+ os_stat = item_path.stat()
174
+ return os_stat.st_size
84
175
 
85
176
  def stat(self, storage_path: str) -> FileStat:
86
177
 
@@ -89,63 +180,100 @@ class LocalFileStorage(IFileStorage):
89
180
 
90
181
  def _stat(self, storage_path: str) -> FileStat:
91
182
 
92
- item_path = self._root_path / storage_path
183
+ item_path = self._resolve_path(storage_path, "STAT", True)
93
184
  os_stat = item_path.stat()
94
185
 
186
+ return self._os_to_trac_stat(item_path, os_stat)
187
+
188
+ def _os_to_trac_stat(self, item_path: pathlib.Path, os_stat: os.stat_result):
189
+
190
+ file_name = "." if item_path == self._root_path else item_path.name
191
+
95
192
  file_type = FileType.FILE if item_path.is_file() \
96
193
  else FileType.DIRECTORY if item_path.is_dir() \
97
194
  else None
98
195
 
196
+ file_size = 0 if file_type is FileType.DIRECTORY else os_stat.st_size
197
+
99
198
  return FileStat(
199
+ file_name=file_name,
100
200
  file_type=file_type,
101
- size=os_stat.st_size,
102
- ctime=dt.datetime.fromtimestamp(os_stat.st_ctime, dt.timezone.utc),
201
+ storage_path=str(item_path.relative_to(self._root_path).as_posix()),
202
+ size=file_size,
103
203
  mtime=dt.datetime.fromtimestamp(os_stat.st_mtime, dt.timezone.utc),
104
- atime=dt.datetime.fromtimestamp(os_stat.st_atime, dt.timezone.utc),
105
- uid=os_stat.st_uid,
106
- gid=os_stat.st_gid,
107
- mode=os_stat.st_mode)
204
+ atime=dt.datetime.fromtimestamp(os_stat.st_atime, dt.timezone.utc))
108
205
 
109
- def ls(self, storage_path: str) -> tp.List[str]:
206
+ def ls(self, storage_path: str, recursive: bool = False) -> tp.List[FileStat]:
110
207
 
111
208
  operation = f"LS [{storage_path}]"
112
- return self._error_handling(operation, lambda: self._ls(storage_path))
209
+ return self._error_handling(operation, lambda: self._ls(storage_path, recursive))
113
210
 
114
- def _ls(self, storage_path: str) -> tp.List[str]:
211
+ def _ls(self, storage_path: str, recursive: bool = False) -> tp.List[FileStat]:
115
212
 
116
- item_path = self._root_path / storage_path
117
- return [str(x.relative_to(item_path))
118
- for x in item_path.iterdir()
119
- if x.is_file() or x.is_dir()]
213
+ item_path = self._resolve_path(storage_path, "LS", True)
120
214
 
121
- def mkdir(self, storage_path: str, recursive: bool = False, exists_ok: bool = False):
215
+ if not item_path.exists():
216
+ raise ex.EStorageRequest(f"Storage path does not exist: LS [{storage_path}]")
217
+
218
+ # If LS is called on anything other than a directory, return a listing of that one item
219
+ if not item_path.is_dir():
220
+ os_stat = item_path.stat()
221
+ stat = self._os_to_trac_stat(item_path, os_stat)
222
+ return [stat]
223
+
224
+ # Otherwise do a regular directory listing
225
+ else:
226
+ pattern = "**/*" if recursive else "*"
227
+ paths = list(item_path.glob(pattern))
228
+ return list(map(lambda p: self._os_to_trac_stat(p, p.stat()), paths))
229
+
230
+ def mkdir(self, storage_path: str, recursive: bool = False):
122
231
 
123
232
  operation = f"MKDIR [{storage_path}]"
124
- self._error_handling(operation, lambda: self._mkdir(storage_path, recursive, exists_ok))
233
+ self._error_handling(operation, lambda: self._mkdir(storage_path, recursive))
125
234
 
126
- def _mkdir(self, storage_path: str, recursive: bool = False, exists_ok: bool = False):
235
+ def _mkdir(self, storage_path: str, recursive: bool = False):
127
236
 
128
- item_path = self._root_path / storage_path
129
- item_path.mkdir(parents=recursive, exist_ok=exists_ok)
237
+ item_path = self._resolve_path(storage_path, "MKDIR", False)
238
+ item_path.mkdir(parents=recursive, exist_ok=True)
130
239
 
131
- def rm(self, storage_path: str, recursive: bool = False):
240
+ def rm(self, storage_path: str):
132
241
 
133
- operation = f"MKDIR [{storage_path}]"
134
- self._error_handling(operation, lambda: self._rm(storage_path, recursive))
242
+ operation = f"RM [{storage_path}]"
243
+ self._error_handling(operation, lambda: self._rm(storage_path))
244
+
245
+ def _rm(self, storage_path: str):
246
+
247
+ item_path = self._resolve_path(storage_path, "RM", False)
248
+
249
+ if not item_path.is_file():
250
+ raise ex.EStorageRequest(f"Storage path is not a file: RM [{storage_path}]")
251
+
252
+ item_path.unlink()
253
+
254
+ def rmdir(self, storage_path: str):
255
+
256
+ operation = f"RMDIR [{storage_path}]"
257
+ self._error_handling(operation, lambda: self._rmdir(storage_path))
135
258
 
136
- def _rm(self, storage_path: str, recursive: bool = False):
259
+ def _rmdir(self, storage_path: str):
137
260
 
138
- raise NotImplementedError()
261
+ item_path = self._resolve_path(storage_path, "RMDIR", False)
139
262
 
140
- def read_bytes(self, storage_path: str) -> bytes:
263
+ if not item_path.is_dir():
264
+ raise ex.EStorageRequest(f"Storage path is not a directory: RMDIR [{storage_path}]")
141
265
 
142
- operation = f"READ BYTES [{storage_path}]"
143
- return self._error_handling(operation, lambda: self._read_bytes(storage_path))
266
+ self._rmdir_inner(item_path)
144
267
 
145
- def _read_bytes(self, storage_path: str) -> bytes:
268
+ def _rmdir_inner(self, item_path):
146
269
 
147
- with self.read_byte_stream(storage_path) as stream:
148
- return stream.read()
270
+ for item in item_path.iterdir():
271
+ if item.is_dir():
272
+ self._rmdir_inner(item)
273
+ else:
274
+ item.unlink()
275
+
276
+ item_path.rmdir()
149
277
 
150
278
  def read_byte_stream(self, storage_path: str) -> tp.BinaryIO:
151
279
 
@@ -154,90 +282,97 @@ class LocalFileStorage(IFileStorage):
154
282
 
155
283
  def _read_byte_stream(self, storage_path: str) -> tp.BinaryIO:
156
284
 
157
- operation = f"CLOSE BYTE STREAM (READ) [{storage_path}]"
158
- item_path = self._root_path / storage_path
159
- stream = open(item_path, mode='rb')
285
+ item_path = self._resolve_path(storage_path, "OPEN BYTE STREAM (READ)", False)
160
286
 
161
- return _helpers.log_close(stream, self._log, operation)
287
+ # Do not try to open directories or other non-file objects for reading
288
+ if item_path.exists() and not item_path.is_file():
289
+ raise ex.EStorageRequest(f"Storage path is not a file: OPEN BYTE STREAM (READ) [{storage_path}]")
162
290
 
163
- def write_bytes(self, storage_path: str, data: bytes, overwrite: bool = False):
291
+ stream = open(item_path, mode='rb')
164
292
 
165
- operation = f"WRITE BYTES [{storage_path}]"
166
- self._error_handling(operation, lambda: self._write_bytes(storage_path, data, overwrite))
293
+ return _StreamResource(stream, lambda: self._close_byte_stream(storage_path, stream))
167
294
 
168
- def _write_bytes(self, storage_path: str, data: bytes, overwrite: bool = False):
295
+ def write_byte_stream(self, storage_path: str) -> tp.BinaryIO:
169
296
 
170
- with self.write_byte_stream(storage_path, overwrite) as stream:
171
- stream.write(data)
297
+ operation = f"OPEN BYTE STREAM (WRITE) [{storage_path}]"
298
+ return self._error_handling(operation, lambda: self._write_byte_stream(storage_path))
172
299
 
173
- def write_byte_stream(self, storage_path: str, overwrite: bool = False) -> tp.BinaryIO:
300
+ def _write_byte_stream(self, storage_path: str) -> tp.BinaryIO:
174
301
 
175
- operation = f"OPEN BYTE STREAM (WRITE) [{storage_path}]"
176
- return self._error_handling(operation, lambda: self._write_byte_stream(storage_path, overwrite))
302
+ item_path = self._resolve_path(storage_path, "OPEN BYTE STREAM (WRITE)", False)
177
303
 
178
- def _write_byte_stream(self, storage_path: str, overwrite: bool = False) -> tp.BinaryIO:
304
+ # Make sure the parent dir always exists
305
+ # This brings local storage in line with cloud bucket semantics for writing objects
306
+ if not item_path.parent.exists():
307
+ item_path.parent.mkdir(parents=True, exist_ok=True)
179
308
 
180
- operation = f"CLOSE BYTE STREAM (WRITE) [{storage_path}]"
181
- item_path = self._root_path / storage_path
309
+ # Do not try to open directories or other non-file objects for write
310
+ if item_path.exists() and not item_path.is_file():
311
+ raise ex.EStorageRequest(f"Storage path is not a file: OPEN BYTE STREAM (WRITE) [{storage_path}]")
182
312
 
183
- if overwrite:
184
- stream = open(item_path, mode='wb')
185
- else:
186
- stream = open(item_path, mode='xb')
313
+ # If the file does not already exist and there is an error, try to clean it up
314
+ delete_on_error = not item_path.exists()
187
315
 
188
- return _helpers.log_close(stream, self._log, operation)
316
+ # Always overwrite existing files, this is in line with cloud bucket semantics
317
+ stream = open(item_path, mode='wb')
189
318
 
190
- def read_text(self, storage_path: str, encoding: str = 'utf-8') -> str:
319
+ return _StreamResource(stream, lambda: self._close_byte_stream(storage_path, stream, delete_on_error))
191
320
 
192
- operation = f"READ TEXT [{storage_path}]"
193
- return self._error_handling(operation, lambda: self._read_text(storage_path, encoding))
321
+ def _close_byte_stream(self, storage_path: str, stream: tp.BinaryIO, delete_on_error: bool = False):
194
322
 
195
- def _read_text(self, storage_path: str, encoding: str = 'utf-8') -> str:
323
+ if stream.closed:
324
+ return
196
325
 
197
- with self.read_text_stream(storage_path, encoding) as stream:
198
- return stream.read()
326
+ try:
327
+ read_write = "WRITE" if stream.writable() else "READ"
328
+ self._log.info(f"CLOSE BYTE STREAM ({read_write}) [{storage_path}]")
199
329
 
200
- def read_text_stream(self, storage_path: str, encoding: str = 'utf-8') -> tp.TextIO:
330
+ finally:
331
+ stream.close()
201
332
 
202
- operation = f"OPEN TEXT STREAM (READ) [{storage_path}]"
203
- return self._error_handling(operation, lambda: self._read_text_stream(storage_path, encoding))
333
+ exc_info = sys.exc_info()
334
+ error = exc_info[1] if exc_info is not None else None
204
335
 
205
- def _read_text_stream(self, storage_path: str, encoding: str = 'utf-8') -> tp.TextIO:
336
+ if error is not None and delete_on_error:
337
+ try:
338
+ item_path = self._resolve_path(storage_path, "CLOSE BYTE STREAM (WRITE)", False)
339
+ if item_path.exists():
340
+ item_path.unlink()
341
+ except OSError:
342
+ pass
206
343
 
207
- operation = f"CLOSE TEXT STREAM (READ) [{storage_path}]"
208
- item_path = self._root_path / storage_path
209
- stream = open(item_path, mode='rt', encoding=encoding)
344
+ __T = tp.TypeVar("__T")
210
345
 
211
- return _helpers.log_close(stream, self._log, operation)
346
+ def _resolve_path(self, storage_path: str, operation_name: str, allow_root_dir: bool) -> pathlib.Path:
212
347
 
213
- def write_text(self, storage_path: str, data: str, encoding: str = 'utf-8', overwrite: bool = False):
348
+ try:
214
349
 
215
- operation = f"WRITE TEXT [{storage_path}]"
216
- self._error_handling(operation, lambda: self._write_text(storage_path, data, encoding, overwrite))
350
+ if storage_path is None or len(storage_path.strip()) == 0:
351
+ raise ex.EStorageValidation(f"Storage path is null or blank: {operation_name} [{storage_path}]")
217
352
 
218
- def _write_text(self, storage_path: str, data: str, encoding: str = 'utf-8', overwrite: bool = False):
353
+ if self._ILLEGAL_PATH_CHARS.match(storage_path):
354
+ raise ex.EStorageValidation(f"Storage path is invalid: {operation_name} [{storage_path}]")
219
355
 
220
- with self.write_text_stream(storage_path, encoding, overwrite) as stream:
221
- stream.write(data)
356
+ relative_path = pathlib.Path(storage_path)
222
357
 
223
- def write_text_stream(self, storage_path: str, encoding: str = 'utf-8', overwrite: bool = False) -> tp.TextIO:
358
+ if relative_path.is_absolute():
359
+ raise ex.EStorageValidation(f"Storage path is not relative: {operation_name} [{storage_path}]")
224
360
 
225
- operation = f"OPEN TEXT STREAM (WRITE) [{storage_path}]"
226
- return self._error_handling(operation, lambda: self._write_text_stream(storage_path, encoding, overwrite))
361
+ root_path = self._root_path
362
+ absolute_path = self._root_path.joinpath(relative_path).resolve(False)
227
363
 
228
- def _write_text_stream(self, storage_path: str, encoding: str = 'utf-8', overwrite: bool = False) -> tp.TextIO:
364
+ # is_relative_to only supported in Python 3.9+, we need to support 3.7
365
+ if absolute_path != root_path and root_path not in absolute_path.parents:
366
+ raise ex.EStorageValidation(f"Path is outside storage root: {operation_name} [{storage_path}]")
229
367
 
230
- operation = f"CLOSE TEXT STREAM (WRITE) [{storage_path}]"
231
- item_path = self._root_path / storage_path
368
+ if absolute_path == root_path and not allow_root_dir:
369
+ raise ex.EStorageValidation(f"Illegal operation for storage root: {operation_name} [{storage_path}]")
232
370
 
233
- if overwrite:
234
- stream = open(item_path, mode='wt', encoding=encoding)
235
- else:
236
- stream = open(item_path, mode='xt', encoding=encoding)
371
+ return absolute_path
237
372
 
238
- return _helpers.log_close(stream, self._log, operation)
373
+ except ValueError as e:
239
374
 
240
- __T = tp.TypeVar("__T")
375
+ raise ex.EStorageValidation(f"Storage path is invalid: {operation_name} [{storage_path}]") from e
241
376
 
242
377
  def _error_handling(self, operation: str, func: tp.Callable[[], __T]) -> __T:
243
378
 
@@ -245,6 +380,11 @@ class LocalFileStorage(IFileStorage):
245
380
  self._log.info(operation)
246
381
  return func()
247
382
 
383
+ # ETrac means the error is already handled, log the message as-is
384
+ except ex.ETrac as e:
385
+ self._log.exception(f"{operation} {str(e)}")
386
+ raise
387
+
248
388
  except FileNotFoundError as e:
249
389
  msg = "File not found"
250
390
  self._log.exception(f"{operation}: {msg}")
@@ -255,6 +395,16 @@ class LocalFileStorage(IFileStorage):
255
395
  self._log.exception(f"{operation}: {msg}")
256
396
  raise ex.EStorageRequest(msg) from e
257
397
 
398
+ except IsADirectoryError as e:
399
+ msg = "Path is a directory, not a file"
400
+ self._log.exception(f"{operation}: {msg}")
401
+ raise ex.EStorageRequest(msg) from e
402
+
403
+ except NotADirectoryError as e:
404
+ msg = "Path is not a directory"
405
+ self._log.exception(f"{operation}: {msg}")
406
+ raise ex.EStorageRequest(msg) from e
407
+
258
408
  except PermissionError as e:
259
409
  msg = "Access denied"
260
410
  self._log.exception(f"{operation}: {msg}")
@@ -265,5 +415,6 @@ class LocalFileStorage(IFileStorage):
265
415
  self._log.exception(f"{operation}: {msg}")
266
416
  raise ex.EStorageAccess(msg) from e
267
417
 
268
-
269
- _storage.StorageManager.register_storage_type("LOCAL", LocalFileStorage, _storage.CommonDataStorage)
418
+ _ILLEGAL_PATH_CHARS_WINDOWS = re.compile(r".*[\x00<>:\"\'|?*].*")
419
+ _ILLEGAL_PATH_CHARS_POSIX = re.compile(r".*[\x00<>:\"\'|?*\\].*")
420
+ _ILLEGAL_PATH_CHARS = _ILLEGAL_PATH_CHARS_WINDOWS if _helpers.is_windows() else _ILLEGAL_PATH_CHARS_POSIX