tensorbored 2.21.0rc1769983804__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tensorbored/__init__.py +112 -0
- tensorbored/_vendor/__init__.py +0 -0
- tensorbored/_vendor/bleach/__init__.py +125 -0
- tensorbored/_vendor/bleach/_vendor/__init__.py +0 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/__init__.py +35 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_ihatexml.py +289 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_inputstream.py +918 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_tokenizer.py +1735 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_trie/__init__.py +5 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_trie/_base.py +40 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_trie/py.py +67 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_utils.py +159 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/constants.py +2946 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/__init__.py +0 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/alphabeticalattributes.py +29 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/base.py +12 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/inject_meta_charset.py +73 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/lint.py +93 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/optionaltags.py +207 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/sanitizer.py +916 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/whitespace.py +38 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/html5parser.py +2795 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/serializer.py +409 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/__init__.py +30 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/genshi.py +54 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/sax.py +50 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/__init__.py +88 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/base.py +417 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/dom.py +239 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/etree.py +343 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/etree_lxml.py +392 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/__init__.py +154 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/base.py +252 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/dom.py +43 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/etree.py +131 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/etree_lxml.py +215 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/genshi.py +69 -0
- tensorbored/_vendor/bleach/_vendor/parse.py +1078 -0
- tensorbored/_vendor/bleach/callbacks.py +32 -0
- tensorbored/_vendor/bleach/html5lib_shim.py +757 -0
- tensorbored/_vendor/bleach/linkifier.py +633 -0
- tensorbored/_vendor/bleach/parse_shim.py +1 -0
- tensorbored/_vendor/bleach/sanitizer.py +638 -0
- tensorbored/_vendor/bleach/six_shim.py +19 -0
- tensorbored/_vendor/webencodings/__init__.py +342 -0
- tensorbored/_vendor/webencodings/labels.py +231 -0
- tensorbored/_vendor/webencodings/mklabels.py +59 -0
- tensorbored/_vendor/webencodings/x_user_defined.py +325 -0
- tensorbored/assets.py +36 -0
- tensorbored/auth.py +102 -0
- tensorbored/backend/__init__.py +0 -0
- tensorbored/backend/application.py +604 -0
- tensorbored/backend/auth_context_middleware.py +38 -0
- tensorbored/backend/client_feature_flags.py +113 -0
- tensorbored/backend/empty_path_redirect.py +46 -0
- tensorbored/backend/event_processing/__init__.py +0 -0
- tensorbored/backend/event_processing/data_ingester.py +276 -0
- tensorbored/backend/event_processing/data_provider.py +535 -0
- tensorbored/backend/event_processing/directory_loader.py +142 -0
- tensorbored/backend/event_processing/directory_watcher.py +272 -0
- tensorbored/backend/event_processing/event_accumulator.py +950 -0
- tensorbored/backend/event_processing/event_file_inspector.py +463 -0
- tensorbored/backend/event_processing/event_file_loader.py +292 -0
- tensorbored/backend/event_processing/event_multiplexer.py +521 -0
- tensorbored/backend/event_processing/event_util.py +68 -0
- tensorbored/backend/event_processing/io_wrapper.py +223 -0
- tensorbored/backend/event_processing/plugin_asset_util.py +104 -0
- tensorbored/backend/event_processing/plugin_event_accumulator.py +721 -0
- tensorbored/backend/event_processing/plugin_event_multiplexer.py +522 -0
- tensorbored/backend/event_processing/reservoir.py +266 -0
- tensorbored/backend/event_processing/tag_types.py +29 -0
- tensorbored/backend/experiment_id.py +71 -0
- tensorbored/backend/experimental_plugin.py +51 -0
- tensorbored/backend/http_util.py +263 -0
- tensorbored/backend/json_util.py +70 -0
- tensorbored/backend/path_prefix.py +67 -0
- tensorbored/backend/process_graph.py +74 -0
- tensorbored/backend/security_validator.py +202 -0
- tensorbored/compat/__init__.py +69 -0
- tensorbored/compat/proto/__init__.py +0 -0
- tensorbored/compat/proto/allocation_description_pb2.py +35 -0
- tensorbored/compat/proto/api_def_pb2.py +82 -0
- tensorbored/compat/proto/attr_value_pb2.py +80 -0
- tensorbored/compat/proto/cluster_pb2.py +58 -0
- tensorbored/compat/proto/config_pb2.py +271 -0
- tensorbored/compat/proto/coordination_config_pb2.py +45 -0
- tensorbored/compat/proto/cost_graph_pb2.py +87 -0
- tensorbored/compat/proto/cpp_shape_inference_pb2.py +70 -0
- tensorbored/compat/proto/debug_pb2.py +65 -0
- tensorbored/compat/proto/event_pb2.py +149 -0
- tensorbored/compat/proto/full_type_pb2.py +74 -0
- tensorbored/compat/proto/function_pb2.py +157 -0
- tensorbored/compat/proto/graph_debug_info_pb2.py +111 -0
- tensorbored/compat/proto/graph_pb2.py +41 -0
- tensorbored/compat/proto/histogram_pb2.py +39 -0
- tensorbored/compat/proto/meta_graph_pb2.py +254 -0
- tensorbored/compat/proto/node_def_pb2.py +61 -0
- tensorbored/compat/proto/op_def_pb2.py +81 -0
- tensorbored/compat/proto/resource_handle_pb2.py +48 -0
- tensorbored/compat/proto/rewriter_config_pb2.py +93 -0
- tensorbored/compat/proto/rpc_options_pb2.py +35 -0
- tensorbored/compat/proto/saved_object_graph_pb2.py +193 -0
- tensorbored/compat/proto/saver_pb2.py +38 -0
- tensorbored/compat/proto/step_stats_pb2.py +116 -0
- tensorbored/compat/proto/struct_pb2.py +144 -0
- tensorbored/compat/proto/summary_pb2.py +111 -0
- tensorbored/compat/proto/tensor_description_pb2.py +38 -0
- tensorbored/compat/proto/tensor_pb2.py +68 -0
- tensorbored/compat/proto/tensor_shape_pb2.py +46 -0
- tensorbored/compat/proto/tfprof_log_pb2.py +307 -0
- tensorbored/compat/proto/trackable_object_graph_pb2.py +90 -0
- tensorbored/compat/proto/types_pb2.py +105 -0
- tensorbored/compat/proto/variable_pb2.py +62 -0
- tensorbored/compat/proto/verifier_config_pb2.py +38 -0
- tensorbored/compat/proto/versions_pb2.py +35 -0
- tensorbored/compat/tensorflow_stub/__init__.py +38 -0
- tensorbored/compat/tensorflow_stub/app.py +124 -0
- tensorbored/compat/tensorflow_stub/compat/__init__.py +131 -0
- tensorbored/compat/tensorflow_stub/compat/v1/__init__.py +20 -0
- tensorbored/compat/tensorflow_stub/dtypes.py +692 -0
- tensorbored/compat/tensorflow_stub/error_codes.py +169 -0
- tensorbored/compat/tensorflow_stub/errors.py +507 -0
- tensorbored/compat/tensorflow_stub/flags.py +124 -0
- tensorbored/compat/tensorflow_stub/io/__init__.py +17 -0
- tensorbored/compat/tensorflow_stub/io/gfile.py +1011 -0
- tensorbored/compat/tensorflow_stub/pywrap_tensorflow.py +285 -0
- tensorbored/compat/tensorflow_stub/tensor_shape.py +1035 -0
- tensorbored/context.py +129 -0
- tensorbored/data/__init__.py +0 -0
- tensorbored/data/grpc_provider.py +365 -0
- tensorbored/data/ingester.py +46 -0
- tensorbored/data/proto/__init__.py +0 -0
- tensorbored/data/proto/data_provider_pb2.py +517 -0
- tensorbored/data/proto/data_provider_pb2_grpc.py +374 -0
- tensorbored/data/provider.py +1365 -0
- tensorbored/data/server_ingester.py +301 -0
- tensorbored/data_compat.py +159 -0
- tensorbored/dataclass_compat.py +224 -0
- tensorbored/default.py +124 -0
- tensorbored/errors.py +130 -0
- tensorbored/lazy.py +99 -0
- tensorbored/main.py +48 -0
- tensorbored/main_lib.py +62 -0
- tensorbored/manager.py +487 -0
- tensorbored/notebook.py +441 -0
- tensorbored/plugin_util.py +266 -0
- tensorbored/plugins/__init__.py +0 -0
- tensorbored/plugins/audio/__init__.py +0 -0
- tensorbored/plugins/audio/audio_plugin.py +229 -0
- tensorbored/plugins/audio/metadata.py +69 -0
- tensorbored/plugins/audio/plugin_data_pb2.py +37 -0
- tensorbored/plugins/audio/summary.py +230 -0
- tensorbored/plugins/audio/summary_v2.py +124 -0
- tensorbored/plugins/base_plugin.py +367 -0
- tensorbored/plugins/core/__init__.py +0 -0
- tensorbored/plugins/core/core_plugin.py +981 -0
- tensorbored/plugins/custom_scalar/__init__.py +0 -0
- tensorbored/plugins/custom_scalar/custom_scalars_plugin.py +320 -0
- tensorbored/plugins/custom_scalar/layout_pb2.py +85 -0
- tensorbored/plugins/custom_scalar/metadata.py +35 -0
- tensorbored/plugins/custom_scalar/summary.py +79 -0
- tensorbored/plugins/debugger_v2/__init__.py +0 -0
- tensorbored/plugins/debugger_v2/debug_data_multiplexer.py +631 -0
- tensorbored/plugins/debugger_v2/debug_data_provider.py +634 -0
- tensorbored/plugins/debugger_v2/debugger_v2_plugin.py +504 -0
- tensorbored/plugins/distribution/__init__.py +0 -0
- tensorbored/plugins/distribution/compressor.py +158 -0
- tensorbored/plugins/distribution/distributions_plugin.py +116 -0
- tensorbored/plugins/distribution/metadata.py +19 -0
- tensorbored/plugins/graph/__init__.py +0 -0
- tensorbored/plugins/graph/graph_util.py +129 -0
- tensorbored/plugins/graph/graphs_plugin.py +336 -0
- tensorbored/plugins/graph/keras_util.py +328 -0
- tensorbored/plugins/graph/metadata.py +42 -0
- tensorbored/plugins/histogram/__init__.py +0 -0
- tensorbored/plugins/histogram/histograms_plugin.py +144 -0
- tensorbored/plugins/histogram/metadata.py +63 -0
- tensorbored/plugins/histogram/plugin_data_pb2.py +34 -0
- tensorbored/plugins/histogram/summary.py +234 -0
- tensorbored/plugins/histogram/summary_v2.py +292 -0
- tensorbored/plugins/hparams/__init__.py +14 -0
- tensorbored/plugins/hparams/_keras.py +93 -0
- tensorbored/plugins/hparams/api.py +130 -0
- tensorbored/plugins/hparams/api_pb2.py +208 -0
- tensorbored/plugins/hparams/backend_context.py +606 -0
- tensorbored/plugins/hparams/download_data.py +158 -0
- tensorbored/plugins/hparams/error.py +26 -0
- tensorbored/plugins/hparams/get_experiment.py +71 -0
- tensorbored/plugins/hparams/hparams_plugin.py +206 -0
- tensorbored/plugins/hparams/hparams_util_pb2.py +69 -0
- tensorbored/plugins/hparams/json_format_compat.py +38 -0
- tensorbored/plugins/hparams/list_metric_evals.py +57 -0
- tensorbored/plugins/hparams/list_session_groups.py +1040 -0
- tensorbored/plugins/hparams/metadata.py +125 -0
- tensorbored/plugins/hparams/metrics.py +41 -0
- tensorbored/plugins/hparams/plugin_data_pb2.py +69 -0
- tensorbored/plugins/hparams/summary.py +205 -0
- tensorbored/plugins/hparams/summary_v2.py +597 -0
- tensorbored/plugins/image/__init__.py +0 -0
- tensorbored/plugins/image/images_plugin.py +232 -0
- tensorbored/plugins/image/metadata.py +65 -0
- tensorbored/plugins/image/plugin_data_pb2.py +34 -0
- tensorbored/plugins/image/summary.py +159 -0
- tensorbored/plugins/image/summary_v2.py +130 -0
- tensorbored/plugins/mesh/__init__.py +14 -0
- tensorbored/plugins/mesh/mesh_plugin.py +292 -0
- tensorbored/plugins/mesh/metadata.py +152 -0
- tensorbored/plugins/mesh/plugin_data_pb2.py +37 -0
- tensorbored/plugins/mesh/summary.py +251 -0
- tensorbored/plugins/mesh/summary_v2.py +214 -0
- tensorbored/plugins/metrics/__init__.py +0 -0
- tensorbored/plugins/metrics/metadata.py +17 -0
- tensorbored/plugins/metrics/metrics_plugin.py +623 -0
- tensorbored/plugins/pr_curve/__init__.py +0 -0
- tensorbored/plugins/pr_curve/metadata.py +75 -0
- tensorbored/plugins/pr_curve/plugin_data_pb2.py +34 -0
- tensorbored/plugins/pr_curve/pr_curves_plugin.py +241 -0
- tensorbored/plugins/pr_curve/summary.py +574 -0
- tensorbored/plugins/profile_redirect/__init__.py +0 -0
- tensorbored/plugins/profile_redirect/profile_redirect_plugin.py +49 -0
- tensorbored/plugins/projector/__init__.py +67 -0
- tensorbored/plugins/projector/metadata.py +26 -0
- tensorbored/plugins/projector/projector_config_pb2.py +54 -0
- tensorbored/plugins/projector/projector_plugin.py +795 -0
- tensorbored/plugins/projector/tf_projector_plugin/index.js +32 -0
- tensorbored/plugins/projector/tf_projector_plugin/projector_binary.html +524 -0
- tensorbored/plugins/projector/tf_projector_plugin/projector_binary.js +15536 -0
- tensorbored/plugins/scalar/__init__.py +0 -0
- tensorbored/plugins/scalar/metadata.py +60 -0
- tensorbored/plugins/scalar/plugin_data_pb2.py +34 -0
- tensorbored/plugins/scalar/scalars_plugin.py +181 -0
- tensorbored/plugins/scalar/summary.py +109 -0
- tensorbored/plugins/scalar/summary_v2.py +124 -0
- tensorbored/plugins/text/__init__.py +0 -0
- tensorbored/plugins/text/metadata.py +62 -0
- tensorbored/plugins/text/plugin_data_pb2.py +34 -0
- tensorbored/plugins/text/summary.py +114 -0
- tensorbored/plugins/text/summary_v2.py +124 -0
- tensorbored/plugins/text/text_plugin.py +288 -0
- tensorbored/plugins/wit_redirect/__init__.py +0 -0
- tensorbored/plugins/wit_redirect/wit_redirect_plugin.py +49 -0
- tensorbored/program.py +910 -0
- tensorbored/summary/__init__.py +35 -0
- tensorbored/summary/_output.py +124 -0
- tensorbored/summary/_tf/__init__.py +14 -0
- tensorbored/summary/_tf/summary/__init__.py +178 -0
- tensorbored/summary/_writer.py +105 -0
- tensorbored/summary/v1.py +51 -0
- tensorbored/summary/v2.py +25 -0
- tensorbored/summary/writer/__init__.py +13 -0
- tensorbored/summary/writer/event_file_writer.py +291 -0
- tensorbored/summary/writer/record_writer.py +50 -0
- tensorbored/util/__init__.py +0 -0
- tensorbored/util/encoder.py +116 -0
- tensorbored/util/grpc_util.py +311 -0
- tensorbored/util/img_mime_type_detector.py +40 -0
- tensorbored/util/io_util.py +20 -0
- tensorbored/util/lazy_tensor_creator.py +110 -0
- tensorbored/util/op_evaluator.py +104 -0
- tensorbored/util/platform_util.py +20 -0
- tensorbored/util/tb_logging.py +24 -0
- tensorbored/util/tensor_util.py +617 -0
- tensorbored/util/timing.py +122 -0
- tensorbored/version.py +21 -0
- tensorbored/webfiles.zip +0 -0
- tensorbored-2.21.0rc1769983804.dist-info/METADATA +49 -0
- tensorbored-2.21.0rc1769983804.dist-info/RECORD +271 -0
- tensorbored-2.21.0rc1769983804.dist-info/WHEEL +5 -0
- tensorbored-2.21.0rc1769983804.dist-info/entry_points.txt +6 -0
- tensorbored-2.21.0rc1769983804.dist-info/licenses/LICENSE +739 -0
- tensorbored-2.21.0rc1769983804.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1011 @@
|
|
|
1
|
+
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
"""A limited reimplementation of the TensorFlow FileIO API.
|
|
16
|
+
|
|
17
|
+
The TensorFlow version wraps the C++ FileSystem API. Here we provide a
|
|
18
|
+
pure Python implementation, limited to the features required for
|
|
19
|
+
TensorBoard. This allows running TensorBoard without depending on
|
|
20
|
+
TensorFlow for file operations.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import dataclasses
|
|
24
|
+
import glob as py_glob
|
|
25
|
+
import io
|
|
26
|
+
import os
|
|
27
|
+
import os.path
|
|
28
|
+
import sys
|
|
29
|
+
import tempfile
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
import botocore.exceptions
|
|
33
|
+
import boto3
|
|
34
|
+
|
|
35
|
+
S3_ENABLED = True
|
|
36
|
+
except ImportError:
|
|
37
|
+
S3_ENABLED = False
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
import fsspec
|
|
41
|
+
|
|
42
|
+
FSSPEC_ENABLED = True
|
|
43
|
+
except ImportError:
|
|
44
|
+
FSSPEC_ENABLED = False
|
|
45
|
+
|
|
46
|
+
if sys.version_info < (3, 0):
|
|
47
|
+
# In Python 2 FileExistsError is not defined and the
|
|
48
|
+
# error manifests it as OSError.
|
|
49
|
+
FileExistsError = OSError
|
|
50
|
+
|
|
51
|
+
from tensorbored.compat.tensorflow_stub import compat, errors
|
|
52
|
+
|
|
53
|
+
# A good default block size depends on the system in question.
|
|
54
|
+
# A somewhat conservative default chosen here.
|
|
55
|
+
_DEFAULT_BLOCK_SIZE = 16 * 1024 * 1024
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# Registry of filesystems by prefix.
|
|
59
|
+
#
|
|
60
|
+
# Currently supports "s3://" URLs for S3 based on boto3 and falls
|
|
61
|
+
# back to local filesystem.
|
|
62
|
+
_REGISTERED_FILESYSTEMS = {}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def register_filesystem(prefix, filesystem):
|
|
66
|
+
if ":" in prefix:
|
|
67
|
+
raise ValueError("Filesystem prefix cannot contain a :")
|
|
68
|
+
_REGISTERED_FILESYSTEMS[prefix] = filesystem
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def get_filesystem(filename):
|
|
72
|
+
"""Return the registered filesystem for the given file."""
|
|
73
|
+
filename = compat.as_str_any(filename)
|
|
74
|
+
prefix = ""
|
|
75
|
+
index = filename.find("://")
|
|
76
|
+
if index >= 0:
|
|
77
|
+
prefix = filename[:index]
|
|
78
|
+
fs = _REGISTERED_FILESYSTEMS.get(prefix, None)
|
|
79
|
+
if fs is None:
|
|
80
|
+
fs = _get_fsspec_filesystem(filename)
|
|
81
|
+
if fs is None:
|
|
82
|
+
raise ValueError("No recognized filesystem for prefix %s" % prefix)
|
|
83
|
+
return fs
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclasses.dataclass(frozen=True)
|
|
87
|
+
class StatData:
|
|
88
|
+
"""Data returned from the Stat call.
|
|
89
|
+
|
|
90
|
+
Attributes:
|
|
91
|
+
length: Length of the data content.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
length: int
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class LocalFileSystem:
|
|
98
|
+
"""Provides local fileystem access."""
|
|
99
|
+
|
|
100
|
+
def exists(self, filename):
|
|
101
|
+
"""Determines whether a path exists or not."""
|
|
102
|
+
return os.path.exists(compat.as_bytes(filename))
|
|
103
|
+
|
|
104
|
+
def join(self, path, *paths):
|
|
105
|
+
"""Join paths with path delimiter."""
|
|
106
|
+
return os.path.join(path, *paths)
|
|
107
|
+
|
|
108
|
+
def read(self, filename, binary_mode=False, size=None, continue_from=None):
|
|
109
|
+
"""Reads contents of a file to a string.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
filename: string, a path
|
|
113
|
+
binary_mode: bool, read as binary if True, otherwise text
|
|
114
|
+
size: int, number of bytes or characters to read, otherwise
|
|
115
|
+
read all the contents of the file (from the continuation
|
|
116
|
+
marker, if present).
|
|
117
|
+
continue_from: An opaque value returned from a prior invocation of
|
|
118
|
+
`read(...)` marking the last read position, so that reading
|
|
119
|
+
may continue from there. Otherwise read from the beginning.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
A tuple of `(data, continuation_token)` where `data' provides either
|
|
123
|
+
bytes read from the file (if `binary_mode == true`) or the decoded
|
|
124
|
+
string representation thereof (otherwise), and `continuation_token`
|
|
125
|
+
is an opaque value that can be passed to the next invocation of
|
|
126
|
+
`read(...) ' in order to continue from the last read position.
|
|
127
|
+
"""
|
|
128
|
+
mode = "rb" if binary_mode else "r"
|
|
129
|
+
encoding = None if binary_mode else "utf8"
|
|
130
|
+
if not exists(filename):
|
|
131
|
+
raise errors.NotFoundError(
|
|
132
|
+
None, None, "Not Found: " + compat.as_text(filename)
|
|
133
|
+
)
|
|
134
|
+
offset = None
|
|
135
|
+
if continue_from is not None:
|
|
136
|
+
offset = continue_from.get("opaque_offset", None)
|
|
137
|
+
with io.open(filename, mode, encoding=encoding) as f:
|
|
138
|
+
if offset is not None:
|
|
139
|
+
f.seek(offset)
|
|
140
|
+
data = f.read(size)
|
|
141
|
+
# The new offset may not be `offset + len(data)`, due to decoding
|
|
142
|
+
# and newline translation.
|
|
143
|
+
# So, just measure it in whatever terms the underlying stream uses.
|
|
144
|
+
continuation_token = {"opaque_offset": f.tell()}
|
|
145
|
+
return (data, continuation_token)
|
|
146
|
+
|
|
147
|
+
def write(self, filename, file_content, binary_mode=False):
|
|
148
|
+
"""Writes string file contents to a file, overwriting any existing
|
|
149
|
+
contents.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
filename: string, a path
|
|
153
|
+
file_content: string, the contents
|
|
154
|
+
binary_mode: bool, write as binary if True, otherwise text
|
|
155
|
+
"""
|
|
156
|
+
self._write(filename, file_content, "wb" if binary_mode else "w")
|
|
157
|
+
|
|
158
|
+
def append(self, filename, file_content, binary_mode=False):
|
|
159
|
+
"""Append string file contents to a file.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
filename: string, a path
|
|
163
|
+
file_content: string, the contents to append
|
|
164
|
+
binary_mode: bool, write as binary if True, otherwise text
|
|
165
|
+
"""
|
|
166
|
+
self._write(filename, file_content, "ab" if binary_mode else "a")
|
|
167
|
+
|
|
168
|
+
def _write(self, filename, file_content, mode):
|
|
169
|
+
encoding = None if "b" in mode else "utf8"
|
|
170
|
+
with io.open(filename, mode, encoding=encoding) as f:
|
|
171
|
+
compatify = compat.as_bytes if "b" in mode else compat.as_text
|
|
172
|
+
f.write(compatify(file_content))
|
|
173
|
+
|
|
174
|
+
def glob(self, filename):
|
|
175
|
+
"""Returns a list of files that match the given pattern(s)."""
|
|
176
|
+
if isinstance(filename, str):
|
|
177
|
+
return [
|
|
178
|
+
# Convert the filenames to string from bytes.
|
|
179
|
+
compat.as_str_any(matching_filename)
|
|
180
|
+
for matching_filename in py_glob.glob(compat.as_bytes(filename))
|
|
181
|
+
]
|
|
182
|
+
else:
|
|
183
|
+
return [
|
|
184
|
+
# Convert the filenames to string from bytes.
|
|
185
|
+
compat.as_str_any(matching_filename)
|
|
186
|
+
for single_filename in filename
|
|
187
|
+
for matching_filename in py_glob.glob(
|
|
188
|
+
compat.as_bytes(single_filename)
|
|
189
|
+
)
|
|
190
|
+
]
|
|
191
|
+
|
|
192
|
+
def isdir(self, dirname):
|
|
193
|
+
"""Returns whether the path is a directory or not."""
|
|
194
|
+
return os.path.isdir(compat.as_bytes(dirname))
|
|
195
|
+
|
|
196
|
+
def listdir(self, dirname):
|
|
197
|
+
"""Returns a list of entries contained within a directory."""
|
|
198
|
+
if not self.isdir(dirname):
|
|
199
|
+
raise errors.NotFoundError(None, None, "Could not find directory")
|
|
200
|
+
|
|
201
|
+
entries = os.listdir(compat.as_str_any(dirname))
|
|
202
|
+
entries = [compat.as_str_any(item) for item in entries]
|
|
203
|
+
return entries
|
|
204
|
+
|
|
205
|
+
def makedirs(self, path):
|
|
206
|
+
"""Creates a directory and all parent/intermediate directories."""
|
|
207
|
+
os.makedirs(path, exist_ok=True)
|
|
208
|
+
|
|
209
|
+
def stat(self, filename):
|
|
210
|
+
"""Returns file statistics for a given path."""
|
|
211
|
+
# NOTE: Size of the file is given by .st_size as returned from
|
|
212
|
+
# os.stat(), but we convert to .length
|
|
213
|
+
try:
|
|
214
|
+
file_length = os.stat(compat.as_bytes(filename)).st_size
|
|
215
|
+
except OSError:
|
|
216
|
+
raise errors.NotFoundError(None, None, "Could not find file")
|
|
217
|
+
return StatData(file_length)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
class S3FileSystem:
|
|
221
|
+
"""Provides filesystem access to S3."""
|
|
222
|
+
|
|
223
|
+
def __init__(self):
|
|
224
|
+
if not boto3:
|
|
225
|
+
raise ImportError("boto3 must be installed for S3 support.")
|
|
226
|
+
self._s3_endpoint = os.environ.get("S3_ENDPOINT", None)
|
|
227
|
+
|
|
228
|
+
def bucket_and_path(self, url):
|
|
229
|
+
"""Split an S3-prefixed URL into bucket and path."""
|
|
230
|
+
url = compat.as_str_any(url)
|
|
231
|
+
if url.startswith("s3://"):
|
|
232
|
+
url = url[len("s3://") :]
|
|
233
|
+
idx = url.index("/")
|
|
234
|
+
bucket = url[:idx]
|
|
235
|
+
path = url[(idx + 1) :]
|
|
236
|
+
return bucket, path
|
|
237
|
+
|
|
238
|
+
def exists(self, filename):
|
|
239
|
+
"""Determines whether a path exists or not."""
|
|
240
|
+
client = boto3.client("s3", endpoint_url=self._s3_endpoint)
|
|
241
|
+
bucket, path = self.bucket_and_path(filename)
|
|
242
|
+
r = client.list_objects(Bucket=bucket, Prefix=path, Delimiter="/")
|
|
243
|
+
if r.get("Contents") or r.get("CommonPrefixes"):
|
|
244
|
+
return True
|
|
245
|
+
return False
|
|
246
|
+
|
|
247
|
+
def join(self, path, *paths):
|
|
248
|
+
"""Join paths with a slash."""
|
|
249
|
+
return "/".join((path,) + paths)
|
|
250
|
+
|
|
251
|
+
def read(self, filename, binary_mode=False, size=None, continue_from=None):
|
|
252
|
+
"""Reads contents of a file to a string.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
filename: string, a path
|
|
256
|
+
binary_mode: bool, read as binary if True, otherwise text
|
|
257
|
+
size: int, number of bytes or characters to read, otherwise
|
|
258
|
+
read all the contents of the file (from the continuation
|
|
259
|
+
marker, if present).
|
|
260
|
+
continue_from: An opaque value returned from a prior invocation of
|
|
261
|
+
`read(...)` marking the last read position, so that reading
|
|
262
|
+
may continue from there. Otherwise read from the beginning.
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
A tuple of `(data, continuation_token)` where `data' provides either
|
|
266
|
+
bytes read from the file (if `binary_mode == true`) or the decoded
|
|
267
|
+
string representation thereof (otherwise), and `continuation_token`
|
|
268
|
+
is an opaque value that can be passed to the next invocation of
|
|
269
|
+
`read(...) ' in order to continue from the last read position.
|
|
270
|
+
"""
|
|
271
|
+
s3 = boto3.resource("s3", endpoint_url=self._s3_endpoint)
|
|
272
|
+
bucket, path = self.bucket_and_path(filename)
|
|
273
|
+
args = {}
|
|
274
|
+
|
|
275
|
+
# For the S3 case, we use continuation tokens of the form
|
|
276
|
+
# {byte_offset: number}
|
|
277
|
+
offset = 0
|
|
278
|
+
if continue_from is not None:
|
|
279
|
+
offset = continue_from.get("byte_offset", 0)
|
|
280
|
+
|
|
281
|
+
endpoint = ""
|
|
282
|
+
if size is not None:
|
|
283
|
+
# TODO(orionr): This endpoint risks splitting a multi-byte
|
|
284
|
+
# character or splitting \r and \n in the case of CRLFs,
|
|
285
|
+
# producing decoding errors below.
|
|
286
|
+
endpoint = offset + size
|
|
287
|
+
|
|
288
|
+
if offset != 0 or endpoint != "":
|
|
289
|
+
# Asked for a range, so modify the request
|
|
290
|
+
args["Range"] = "bytes={}-{}".format(offset, endpoint)
|
|
291
|
+
|
|
292
|
+
try:
|
|
293
|
+
stream = s3.Object(bucket, path).get(**args)["Body"].read()
|
|
294
|
+
except botocore.exceptions.ClientError as exc:
|
|
295
|
+
if exc.response["Error"]["Code"] in ["416", "InvalidRange"]:
|
|
296
|
+
if size is not None:
|
|
297
|
+
# Asked for too much, so request just to the end. Do this
|
|
298
|
+
# in a second request so we don't check length in all cases.
|
|
299
|
+
client = boto3.client("s3", endpoint_url=self._s3_endpoint)
|
|
300
|
+
obj = client.head_object(Bucket=bucket, Key=path)
|
|
301
|
+
content_length = obj["ContentLength"]
|
|
302
|
+
endpoint = min(content_length, offset + size)
|
|
303
|
+
if offset == endpoint:
|
|
304
|
+
# Asked for no bytes, so just return empty
|
|
305
|
+
stream = b""
|
|
306
|
+
else:
|
|
307
|
+
args["Range"] = "bytes={}-{}".format(offset, endpoint)
|
|
308
|
+
stream = s3.Object(bucket, path).get(**args)["Body"].read()
|
|
309
|
+
else:
|
|
310
|
+
raise
|
|
311
|
+
# `stream` should contain raw bytes here (i.e., there has been neither
|
|
312
|
+
# decoding nor newline translation), so the byte offset increases by
|
|
313
|
+
# the expected amount.
|
|
314
|
+
continuation_token = {"byte_offset": (offset + len(stream))}
|
|
315
|
+
if binary_mode:
|
|
316
|
+
return (bytes(stream), continuation_token)
|
|
317
|
+
else:
|
|
318
|
+
return (stream.decode("utf-8"), continuation_token)
|
|
319
|
+
|
|
320
|
+
def write(self, filename, file_content, binary_mode=False):
|
|
321
|
+
"""Writes string file contents to a file.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
filename: string, a path
|
|
325
|
+
file_content: string, the contents
|
|
326
|
+
binary_mode: bool, write as binary if True, otherwise text
|
|
327
|
+
"""
|
|
328
|
+
client = boto3.client("s3", endpoint_url=self._s3_endpoint)
|
|
329
|
+
bucket, path = self.bucket_and_path(filename)
|
|
330
|
+
# Always convert to bytes for writing
|
|
331
|
+
if binary_mode:
|
|
332
|
+
if not isinstance(file_content, bytes):
|
|
333
|
+
raise TypeError("File content type must be bytes")
|
|
334
|
+
else:
|
|
335
|
+
file_content = compat.as_bytes(file_content)
|
|
336
|
+
client.put_object(Body=file_content, Bucket=bucket, Key=path)
|
|
337
|
+
|
|
338
|
+
def glob(self, filename):
|
|
339
|
+
"""Returns a list of files that match the given pattern(s)."""
|
|
340
|
+
# Only support prefix with * at the end and no ? in the string
|
|
341
|
+
star_i = filename.find("*")
|
|
342
|
+
quest_i = filename.find("?")
|
|
343
|
+
if quest_i >= 0:
|
|
344
|
+
raise NotImplementedError(
|
|
345
|
+
"{} not supported by compat glob".format(filename)
|
|
346
|
+
)
|
|
347
|
+
if star_i != len(filename) - 1:
|
|
348
|
+
# Just return empty so we can use glob from directory watcher
|
|
349
|
+
#
|
|
350
|
+
# TODO: Remove and instead handle in GetLogdirSubdirectories.
|
|
351
|
+
# However, we would need to handle it for all non-local registered
|
|
352
|
+
# filesystems in some way.
|
|
353
|
+
return []
|
|
354
|
+
filename = filename[:-1]
|
|
355
|
+
client = boto3.client("s3", endpoint_url=self._s3_endpoint)
|
|
356
|
+
bucket, path = self.bucket_and_path(filename)
|
|
357
|
+
p = client.get_paginator("list_objects")
|
|
358
|
+
keys = []
|
|
359
|
+
for r in p.paginate(Bucket=bucket, Prefix=path):
|
|
360
|
+
for o in r.get("Contents", []):
|
|
361
|
+
key = o["Key"][len(path) :]
|
|
362
|
+
if key: # Skip the base dir, which would add an empty string
|
|
363
|
+
keys.append(filename + key)
|
|
364
|
+
return keys
|
|
365
|
+
|
|
366
|
+
def isdir(self, dirname):
|
|
367
|
+
"""Returns whether the path is a directory or not."""
|
|
368
|
+
client = boto3.client("s3", endpoint_url=self._s3_endpoint)
|
|
369
|
+
bucket, path = self.bucket_and_path(dirname)
|
|
370
|
+
if not path.endswith("/"):
|
|
371
|
+
path += "/" # This will now only retrieve subdir content
|
|
372
|
+
r = client.list_objects(Bucket=bucket, Prefix=path, Delimiter="/")
|
|
373
|
+
if r.get("Contents") or r.get("CommonPrefixes"):
|
|
374
|
+
return True
|
|
375
|
+
return False
|
|
376
|
+
|
|
377
|
+
def listdir(self, dirname):
|
|
378
|
+
"""Returns a list of entries contained within a directory."""
|
|
379
|
+
client = boto3.client("s3", endpoint_url=self._s3_endpoint)
|
|
380
|
+
bucket, path = self.bucket_and_path(dirname)
|
|
381
|
+
p = client.get_paginator("list_objects")
|
|
382
|
+
if not path.endswith("/"):
|
|
383
|
+
path += "/" # This will now only retrieve subdir content
|
|
384
|
+
keys = []
|
|
385
|
+
for r in p.paginate(Bucket=bucket, Prefix=path, Delimiter="/"):
|
|
386
|
+
keys.extend(
|
|
387
|
+
o["Prefix"][len(path) : -1] for o in r.get("CommonPrefixes", [])
|
|
388
|
+
)
|
|
389
|
+
for o in r.get("Contents", []):
|
|
390
|
+
key = o["Key"][len(path) :]
|
|
391
|
+
if key: # Skip the base dir, which would add an empty string
|
|
392
|
+
keys.append(key)
|
|
393
|
+
return keys
|
|
394
|
+
|
|
395
|
+
def makedirs(self, dirname):
|
|
396
|
+
"""Creates a directory and all parent/intermediate directories."""
|
|
397
|
+
if not self.exists(dirname):
|
|
398
|
+
client = boto3.client("s3", endpoint_url=self._s3_endpoint)
|
|
399
|
+
bucket, path = self.bucket_and_path(dirname)
|
|
400
|
+
if not path.endswith("/"):
|
|
401
|
+
path += "/" # This will make sure we don't override a file
|
|
402
|
+
client.put_object(Body="", Bucket=bucket, Key=path)
|
|
403
|
+
|
|
404
|
+
def stat(self, filename):
|
|
405
|
+
"""Returns file statistics for a given path."""
|
|
406
|
+
# NOTE: Size of the file is given by ContentLength from S3,
|
|
407
|
+
# but we convert to .length
|
|
408
|
+
client = boto3.client("s3", endpoint_url=self._s3_endpoint)
|
|
409
|
+
bucket, path = self.bucket_and_path(filename)
|
|
410
|
+
try:
|
|
411
|
+
obj = client.head_object(Bucket=bucket, Key=path)
|
|
412
|
+
return StatData(obj["ContentLength"])
|
|
413
|
+
except botocore.exceptions.ClientError as exc:
|
|
414
|
+
if exc.response["Error"]["Code"] == "404":
|
|
415
|
+
raise errors.NotFoundError(None, None, "Could not find file")
|
|
416
|
+
else:
|
|
417
|
+
raise
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
class FSSpecFileSystem:
|
|
421
|
+
"""Provides filesystem access via fsspec.
|
|
422
|
+
|
|
423
|
+
The current gfile interface doesn't map perfectly to the fsspec interface
|
|
424
|
+
leading to some notable inefficiencies.
|
|
425
|
+
|
|
426
|
+
* Reads and writes to files cause the file to be reopened each time which
|
|
427
|
+
can cause a performance hit when accessing local file systems.
|
|
428
|
+
* walk doesn't use the native fsspec walk function so performance may be
|
|
429
|
+
slower.
|
|
430
|
+
|
|
431
|
+
See https://github.com/tensorflow/tensorboard/issues/5286 for more info on
|
|
432
|
+
limitations.
|
|
433
|
+
"""
|
|
434
|
+
|
|
435
|
+
SEPARATOR = "://"
|
|
436
|
+
CHAIN_SEPARATOR = "::"
|
|
437
|
+
|
|
438
|
+
def _validate_path(self, path):
|
|
439
|
+
parts = path.split(self.CHAIN_SEPARATOR)
|
|
440
|
+
for part in parts[:-1]:
|
|
441
|
+
if self.SEPARATOR in part:
|
|
442
|
+
raise errors.InvalidArgumentError(
|
|
443
|
+
None,
|
|
444
|
+
None,
|
|
445
|
+
"fsspec URL must only have paths in the last chained filesystem, got {}".format(
|
|
446
|
+
path
|
|
447
|
+
),
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
def _translate_errors(func):
|
|
451
|
+
def func_wrapper(self, *args, **kwargs):
|
|
452
|
+
try:
|
|
453
|
+
return func(self, *args, **kwargs)
|
|
454
|
+
except FileNotFoundError as e:
|
|
455
|
+
raise errors.NotFoundError(None, None, str(e))
|
|
456
|
+
|
|
457
|
+
return func_wrapper
|
|
458
|
+
|
|
459
|
+
def _fs_path(self, filename):
|
|
460
|
+
if isinstance(filename, bytes):
|
|
461
|
+
filename = filename.decode("utf-8")
|
|
462
|
+
self._validate_path(filename)
|
|
463
|
+
|
|
464
|
+
fs, path = fsspec.core.url_to_fs(filename)
|
|
465
|
+
return fs, path
|
|
466
|
+
|
|
467
|
+
@_translate_errors
|
|
468
|
+
def exists(self, filename):
|
|
469
|
+
"""Determines whether a path exists or not."""
|
|
470
|
+
fs, path = self._fs_path(filename)
|
|
471
|
+
return fs.exists(path)
|
|
472
|
+
|
|
473
|
+
def _join(self, sep, paths):
|
|
474
|
+
"""
|
|
475
|
+
_join joins the paths with the given separator.
|
|
476
|
+
"""
|
|
477
|
+
result = []
|
|
478
|
+
for part in paths:
|
|
479
|
+
if part.startswith(sep):
|
|
480
|
+
result = []
|
|
481
|
+
if result and result[-1] and not result[-1].endswith(sep):
|
|
482
|
+
result.append(sep)
|
|
483
|
+
result.append(part)
|
|
484
|
+
return "".join(result)
|
|
485
|
+
|
|
486
|
+
@_translate_errors
|
|
487
|
+
def join(self, path, *paths):
|
|
488
|
+
"""Join paths with a slash."""
|
|
489
|
+
self._validate_path(path)
|
|
490
|
+
|
|
491
|
+
before, sep, last_path = path.rpartition(self.CHAIN_SEPARATOR)
|
|
492
|
+
chain_prefix = before + sep
|
|
493
|
+
protocol, path = fsspec.core.split_protocol(last_path)
|
|
494
|
+
fs = fsspec.get_filesystem_class(protocol)
|
|
495
|
+
if protocol:
|
|
496
|
+
chain_prefix += protocol + self.SEPARATOR
|
|
497
|
+
return chain_prefix + self._join(fs.sep, ((path,) + paths))
|
|
498
|
+
|
|
499
|
+
@_translate_errors
|
|
500
|
+
def read(self, filename, binary_mode=False, size=None, continue_from=None):
|
|
501
|
+
"""Reads contents of a file to a string.
|
|
502
|
+
|
|
503
|
+
Args:
|
|
504
|
+
filename: string, a path
|
|
505
|
+
binary_mode: bool, read as binary if True, otherwise text
|
|
506
|
+
size: int, number of bytes or characters to read, otherwise
|
|
507
|
+
read all the contents of the file (from the continuation
|
|
508
|
+
marker, if present).
|
|
509
|
+
continue_from: An opaque value returned from a prior invocation of
|
|
510
|
+
`read(...)` marking the last read position, so that reading
|
|
511
|
+
may continue from there. Otherwise read from the beginning.
|
|
512
|
+
|
|
513
|
+
Returns:
|
|
514
|
+
A tuple of `(data, continuation_token)` where `data' provides either
|
|
515
|
+
bytes read from the file (if `binary_mode == true`) or the decoded
|
|
516
|
+
string representation thereof (otherwise), and `continuation_token`
|
|
517
|
+
is an opaque value that can be passed to the next invocation of
|
|
518
|
+
`read(...) ' in order to continue from the last read position.
|
|
519
|
+
"""
|
|
520
|
+
fs, path = self._fs_path(filename)
|
|
521
|
+
|
|
522
|
+
mode = "rb" if binary_mode else "r"
|
|
523
|
+
encoding = None if binary_mode else "utf8"
|
|
524
|
+
if not exists(filename):
|
|
525
|
+
raise errors.NotFoundError(
|
|
526
|
+
None, None, "Not Found: " + compat.as_text(filename)
|
|
527
|
+
)
|
|
528
|
+
with fs.open(path, mode, encoding=encoding) as f:
|
|
529
|
+
if continue_from is not None:
|
|
530
|
+
if not f.seekable():
|
|
531
|
+
raise errors.InvalidArgumentError(
|
|
532
|
+
None,
|
|
533
|
+
None,
|
|
534
|
+
"{} is not seekable".format(filename),
|
|
535
|
+
)
|
|
536
|
+
offset = continue_from.get("opaque_offset", None)
|
|
537
|
+
if offset is not None:
|
|
538
|
+
f.seek(offset)
|
|
539
|
+
|
|
540
|
+
data = f.read(size)
|
|
541
|
+
# The new offset may not be `offset + len(data)`, due to decoding
|
|
542
|
+
# and newline translation.
|
|
543
|
+
# So, just measure it in whatever terms the underlying stream uses.
|
|
544
|
+
continuation_token = (
|
|
545
|
+
{"opaque_offset": f.tell()} if f.seekable() else {}
|
|
546
|
+
)
|
|
547
|
+
return (data, continuation_token)
|
|
548
|
+
|
|
549
|
+
@_translate_errors
|
|
550
|
+
def write(self, filename, file_content, binary_mode=False):
|
|
551
|
+
"""Writes string file contents to a file.
|
|
552
|
+
|
|
553
|
+
Args:
|
|
554
|
+
filename: string, a path
|
|
555
|
+
file_content: string, the contents
|
|
556
|
+
binary_mode: bool, write as binary if True, otherwise text
|
|
557
|
+
"""
|
|
558
|
+
self._write(filename, file_content, "wb" if binary_mode else "w")
|
|
559
|
+
|
|
560
|
+
@_translate_errors
|
|
561
|
+
def append(self, filename, file_content, binary_mode=False):
|
|
562
|
+
"""Append string file contents to a file.
|
|
563
|
+
|
|
564
|
+
Args:
|
|
565
|
+
filename: string, a path
|
|
566
|
+
file_content: string, the contents to append
|
|
567
|
+
binary_mode: bool, write as binary if True, otherwise text
|
|
568
|
+
"""
|
|
569
|
+
self._write(filename, file_content, "ab" if binary_mode else "a")
|
|
570
|
+
|
|
571
|
+
def _write(self, filename, file_content, mode):
|
|
572
|
+
fs, path = self._fs_path(filename)
|
|
573
|
+
encoding = None if "b" in mode else "utf8"
|
|
574
|
+
with fs.open(path, mode, encoding=encoding) as f:
|
|
575
|
+
compatify = compat.as_bytes if "b" in mode else compat.as_text
|
|
576
|
+
f.write(compatify(file_content))
|
|
577
|
+
|
|
578
|
+
def _get_chain_protocol_prefix(self, filename):
|
|
579
|
+
chain_prefix, chain_sep, last_path = filename.rpartition(
|
|
580
|
+
self.CHAIN_SEPARATOR
|
|
581
|
+
)
|
|
582
|
+
protocol, sep, _ = last_path.rpartition(self.SEPARATOR)
|
|
583
|
+
return chain_prefix + chain_sep + protocol + sep
|
|
584
|
+
|
|
585
|
+
@_translate_errors
|
|
586
|
+
def glob(self, filename):
|
|
587
|
+
"""Returns a list of files that match the given pattern(s)."""
|
|
588
|
+
if isinstance(filename, bytes):
|
|
589
|
+
filename = filename.decode("utf-8")
|
|
590
|
+
|
|
591
|
+
fs, path = self._fs_path(filename)
|
|
592
|
+
files = fs.glob(path)
|
|
593
|
+
|
|
594
|
+
# check if applying the original chaining is required.
|
|
595
|
+
if (
|
|
596
|
+
self.SEPARATOR not in filename
|
|
597
|
+
and self.CHAIN_SEPARATOR not in filename
|
|
598
|
+
):
|
|
599
|
+
return files
|
|
600
|
+
|
|
601
|
+
prefix = self._get_chain_protocol_prefix(filename)
|
|
602
|
+
|
|
603
|
+
return [
|
|
604
|
+
(
|
|
605
|
+
file
|
|
606
|
+
if (self.SEPARATOR in file or self.CHAIN_SEPARATOR in file)
|
|
607
|
+
else prefix + file
|
|
608
|
+
)
|
|
609
|
+
for file in files
|
|
610
|
+
]
|
|
611
|
+
|
|
612
|
+
@_translate_errors
|
|
613
|
+
def isdir(self, dirname):
|
|
614
|
+
"""Returns whether the path is a directory or not."""
|
|
615
|
+
fs, path = self._fs_path(dirname)
|
|
616
|
+
return fs.isdir(path)
|
|
617
|
+
|
|
618
|
+
@_translate_errors
|
|
619
|
+
def listdir(self, dirname):
|
|
620
|
+
"""Returns a list of entries contained within a directory."""
|
|
621
|
+
fs, path = self._fs_path(dirname)
|
|
622
|
+
files = fs.listdir(path, detail=False)
|
|
623
|
+
files = [os.path.basename(fname) for fname in files]
|
|
624
|
+
return files
|
|
625
|
+
|
|
626
|
+
@_translate_errors
|
|
627
|
+
def makedirs(self, dirname):
|
|
628
|
+
"""Creates a directory and all parent/intermediate directories."""
|
|
629
|
+
fs, path = self._fs_path(dirname)
|
|
630
|
+
return fs.makedirs(path, exist_ok=True)
|
|
631
|
+
|
|
632
|
+
@_translate_errors
|
|
633
|
+
def stat(self, filename):
|
|
634
|
+
"""Returns file statistics for a given path."""
|
|
635
|
+
fs, path = self._fs_path(filename)
|
|
636
|
+
return StatData(fs.size(path))
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
_FSSPEC_FILESYSTEM = FSSpecFileSystem()
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
def _get_fsspec_filesystem(filename):
|
|
643
|
+
"""
|
|
644
|
+
_get_fsspec_filesystem checks if the provided protocol is known to fsspec
|
|
645
|
+
and if so returns the filesystem wrapper for it.
|
|
646
|
+
"""
|
|
647
|
+
if not FSSPEC_ENABLED:
|
|
648
|
+
return None
|
|
649
|
+
|
|
650
|
+
segment = filename.partition(FSSpecFileSystem.CHAIN_SEPARATOR)[0]
|
|
651
|
+
protocol = segment.partition(FSSpecFileSystem.SEPARATOR)[0]
|
|
652
|
+
if fsspec.get_filesystem_class(protocol):
|
|
653
|
+
return _FSSPEC_FILESYSTEM
|
|
654
|
+
else:
|
|
655
|
+
return None
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
register_filesystem("", LocalFileSystem())
|
|
659
|
+
if S3_ENABLED:
|
|
660
|
+
register_filesystem("s3", S3FileSystem())
|
|
661
|
+
|
|
662
|
+
|
|
663
|
+
class GFile:
|
|
664
|
+
# Only methods needed for TensorBoard are implemented.
|
|
665
|
+
|
|
666
|
+
def __init__(self, filename, mode):
|
|
667
|
+
if mode not in ("r", "rb", "br", "w", "wb", "bw"):
|
|
668
|
+
raise NotImplementedError(
|
|
669
|
+
"mode {} not supported by compat GFile".format(mode)
|
|
670
|
+
)
|
|
671
|
+
self.filename = compat.as_bytes(filename)
|
|
672
|
+
self.fs = get_filesystem(self.filename)
|
|
673
|
+
self.fs_supports_append = hasattr(self.fs, "append")
|
|
674
|
+
self.buff = None
|
|
675
|
+
# The buffer offset and the buffer chunk size are measured in the
|
|
676
|
+
# natural units of the underlying stream, i.e. bytes for binary mode,
|
|
677
|
+
# or characters in text mode.
|
|
678
|
+
self.buff_chunk_size = _DEFAULT_BLOCK_SIZE
|
|
679
|
+
self.buff_offset = 0
|
|
680
|
+
self.continuation_token = None
|
|
681
|
+
self.write_temp = None
|
|
682
|
+
self.write_started = False
|
|
683
|
+
self.binary_mode = "b" in mode
|
|
684
|
+
self.write_mode = "w" in mode
|
|
685
|
+
self.closed = False
|
|
686
|
+
|
|
687
|
+
def __enter__(self):
|
|
688
|
+
return self
|
|
689
|
+
|
|
690
|
+
def __exit__(self, *args):
|
|
691
|
+
self.close()
|
|
692
|
+
self.buff = None
|
|
693
|
+
self.buff_offset = 0
|
|
694
|
+
self.continuation_token = None
|
|
695
|
+
|
|
696
|
+
def __iter__(self):
|
|
697
|
+
return self
|
|
698
|
+
|
|
699
|
+
def _read_buffer_to_offset(self, new_buff_offset):
|
|
700
|
+
old_buff_offset = self.buff_offset
|
|
701
|
+
read_size = min(len(self.buff), new_buff_offset) - old_buff_offset
|
|
702
|
+
self.buff_offset += read_size
|
|
703
|
+
return self.buff[old_buff_offset : old_buff_offset + read_size]
|
|
704
|
+
|
|
705
|
+
def read(self, n=None):
|
|
706
|
+
"""Reads contents of file to a string.
|
|
707
|
+
|
|
708
|
+
Args:
|
|
709
|
+
n: int, number of bytes or characters to read, otherwise
|
|
710
|
+
read all the contents of the file
|
|
711
|
+
|
|
712
|
+
Returns:
|
|
713
|
+
Subset of the contents of the file as a string or bytes.
|
|
714
|
+
"""
|
|
715
|
+
if self.write_mode:
|
|
716
|
+
raise errors.PermissionDeniedError(
|
|
717
|
+
None, None, "File not opened in read mode"
|
|
718
|
+
)
|
|
719
|
+
|
|
720
|
+
result = None
|
|
721
|
+
if self.buff and len(self.buff) > self.buff_offset:
|
|
722
|
+
# read from local buffer
|
|
723
|
+
if n is not None:
|
|
724
|
+
chunk = self._read_buffer_to_offset(self.buff_offset + n)
|
|
725
|
+
if len(chunk) == n:
|
|
726
|
+
return chunk
|
|
727
|
+
result = chunk
|
|
728
|
+
n -= len(chunk)
|
|
729
|
+
else:
|
|
730
|
+
# add all local buffer and update offsets
|
|
731
|
+
result = self._read_buffer_to_offset(len(self.buff))
|
|
732
|
+
|
|
733
|
+
# read from filesystem
|
|
734
|
+
read_size = max(self.buff_chunk_size, n) if n is not None else None
|
|
735
|
+
self.buff, self.continuation_token = self.fs.read(
|
|
736
|
+
self.filename, self.binary_mode, read_size, self.continuation_token
|
|
737
|
+
)
|
|
738
|
+
self.buff_offset = 0
|
|
739
|
+
|
|
740
|
+
# add from filesystem
|
|
741
|
+
if n is not None:
|
|
742
|
+
chunk = self._read_buffer_to_offset(n)
|
|
743
|
+
else:
|
|
744
|
+
# add all local buffer and update offsets
|
|
745
|
+
chunk = self._read_buffer_to_offset(len(self.buff))
|
|
746
|
+
result = result + chunk if result else chunk
|
|
747
|
+
|
|
748
|
+
return result
|
|
749
|
+
|
|
750
|
+
def write(self, file_content):
|
|
751
|
+
"""Writes string file contents to file, clearing contents of the file
|
|
752
|
+
on first write and then appending on subsequent calls.
|
|
753
|
+
|
|
754
|
+
Args:
|
|
755
|
+
file_content: string, the contents
|
|
756
|
+
"""
|
|
757
|
+
if not self.write_mode:
|
|
758
|
+
raise errors.PermissionDeniedError(
|
|
759
|
+
None, None, "File not opened in write mode"
|
|
760
|
+
)
|
|
761
|
+
if self.closed:
|
|
762
|
+
raise errors.FailedPreconditionError(
|
|
763
|
+
None, None, "File already closed"
|
|
764
|
+
)
|
|
765
|
+
|
|
766
|
+
if self.fs_supports_append:
|
|
767
|
+
if not self.write_started:
|
|
768
|
+
# write the first chunk to truncate file if it already exists
|
|
769
|
+
self.fs.write(self.filename, file_content, self.binary_mode)
|
|
770
|
+
self.write_started = True
|
|
771
|
+
|
|
772
|
+
else:
|
|
773
|
+
# append the later chunks
|
|
774
|
+
self.fs.append(self.filename, file_content, self.binary_mode)
|
|
775
|
+
else:
|
|
776
|
+
# add to temp file, but wait for flush to write to final filesystem
|
|
777
|
+
if self.write_temp is None:
|
|
778
|
+
mode = "w+b" if self.binary_mode else "w+"
|
|
779
|
+
self.write_temp = tempfile.TemporaryFile(mode)
|
|
780
|
+
|
|
781
|
+
compatify = compat.as_bytes if self.binary_mode else compat.as_text
|
|
782
|
+
self.write_temp.write(compatify(file_content))
|
|
783
|
+
|
|
784
|
+
def __next__(self):
|
|
785
|
+
line = None
|
|
786
|
+
while True:
|
|
787
|
+
if not self.buff:
|
|
788
|
+
# read one unit into the buffer
|
|
789
|
+
line = self.read(1)
|
|
790
|
+
if line and (line[-1] == "\n" or not self.buff):
|
|
791
|
+
return line
|
|
792
|
+
if not self.buff:
|
|
793
|
+
raise StopIteration()
|
|
794
|
+
else:
|
|
795
|
+
index = self.buff.find("\n", self.buff_offset)
|
|
796
|
+
if index != -1:
|
|
797
|
+
# include line until now plus newline
|
|
798
|
+
chunk = self.read(index + 1 - self.buff_offset)
|
|
799
|
+
line = line + chunk if line else chunk
|
|
800
|
+
return line
|
|
801
|
+
|
|
802
|
+
# read one unit past end of buffer
|
|
803
|
+
chunk = self.read(len(self.buff) + 1 - self.buff_offset)
|
|
804
|
+
line = line + chunk if line else chunk
|
|
805
|
+
if line and (line[-1] == "\n" or not self.buff):
|
|
806
|
+
return line
|
|
807
|
+
if not self.buff:
|
|
808
|
+
raise StopIteration()
|
|
809
|
+
|
|
810
|
+
def next(self):
|
|
811
|
+
return self.__next__()
|
|
812
|
+
|
|
813
|
+
def flush(self):
|
|
814
|
+
if self.closed:
|
|
815
|
+
raise errors.FailedPreconditionError(
|
|
816
|
+
None, None, "File already closed"
|
|
817
|
+
)
|
|
818
|
+
|
|
819
|
+
if not self.fs_supports_append:
|
|
820
|
+
if self.write_temp is not None:
|
|
821
|
+
# read temp file from the beginning
|
|
822
|
+
self.write_temp.flush()
|
|
823
|
+
self.write_temp.seek(0)
|
|
824
|
+
chunk = self.write_temp.read()
|
|
825
|
+
if chunk is not None:
|
|
826
|
+
# write full contents and keep in temp file
|
|
827
|
+
self.fs.write(self.filename, chunk, self.binary_mode)
|
|
828
|
+
self.write_temp.seek(len(chunk))
|
|
829
|
+
|
|
830
|
+
def close(self):
|
|
831
|
+
self.flush()
|
|
832
|
+
if self.write_temp is not None:
|
|
833
|
+
self.write_temp.close()
|
|
834
|
+
self.write_temp = None
|
|
835
|
+
self.write_started = False
|
|
836
|
+
self.closed = True
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
def exists(filename):
|
|
840
|
+
"""Determines whether a path exists or not.
|
|
841
|
+
|
|
842
|
+
Args:
|
|
843
|
+
filename: string, a path
|
|
844
|
+
|
|
845
|
+
Returns:
|
|
846
|
+
True if the path exists, whether its a file or a directory.
|
|
847
|
+
False if the path does not exist and there are no filesystem errors.
|
|
848
|
+
|
|
849
|
+
Raises:
|
|
850
|
+
errors.OpError: Propagates any errors reported by the FileSystem API.
|
|
851
|
+
"""
|
|
852
|
+
return get_filesystem(filename).exists(filename)
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
def glob(filename):
|
|
856
|
+
"""Returns a list of files that match the given pattern(s).
|
|
857
|
+
|
|
858
|
+
Args:
|
|
859
|
+
filename: string or iterable of strings. The glob pattern(s).
|
|
860
|
+
|
|
861
|
+
Returns:
|
|
862
|
+
A list of strings containing filenames that match the given pattern(s).
|
|
863
|
+
|
|
864
|
+
Raises:
|
|
865
|
+
errors.OpError: If there are filesystem / directory listing errors.
|
|
866
|
+
"""
|
|
867
|
+
return get_filesystem(filename).glob(filename)
|
|
868
|
+
|
|
869
|
+
|
|
870
|
+
def isdir(dirname):
|
|
871
|
+
"""Returns whether the path is a directory or not.
|
|
872
|
+
|
|
873
|
+
Args:
|
|
874
|
+
dirname: string, path to a potential directory
|
|
875
|
+
|
|
876
|
+
Returns:
|
|
877
|
+
True, if the path is a directory; False otherwise
|
|
878
|
+
"""
|
|
879
|
+
return get_filesystem(dirname).isdir(dirname)
|
|
880
|
+
|
|
881
|
+
|
|
882
|
+
def listdir(dirname):
|
|
883
|
+
"""Returns a list of entries contained within a directory.
|
|
884
|
+
|
|
885
|
+
The list is in arbitrary order. It does not contain the special entries "."
|
|
886
|
+
and "..".
|
|
887
|
+
|
|
888
|
+
Args:
|
|
889
|
+
dirname: string, path to a directory
|
|
890
|
+
|
|
891
|
+
Returns:
|
|
892
|
+
[filename1, filename2, ... filenameN] as strings
|
|
893
|
+
|
|
894
|
+
Raises:
|
|
895
|
+
errors.NotFoundError if directory doesn't exist
|
|
896
|
+
"""
|
|
897
|
+
return get_filesystem(dirname).listdir(dirname)
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
def makedirs(path):
|
|
901
|
+
"""Creates a directory and all parent/intermediate directories.
|
|
902
|
+
|
|
903
|
+
It succeeds if path already exists and is writable.
|
|
904
|
+
|
|
905
|
+
Args:
|
|
906
|
+
path: string, name of the directory to be created
|
|
907
|
+
"""
|
|
908
|
+
return get_filesystem(path).makedirs(path)
|
|
909
|
+
|
|
910
|
+
|
|
911
|
+
def walk(top, topdown=True, onerror=None):
|
|
912
|
+
"""Recursive directory tree generator for directories.
|
|
913
|
+
|
|
914
|
+
Args:
|
|
915
|
+
top: string, a Directory name
|
|
916
|
+
topdown: bool, Traverse pre order if True, post order if False.
|
|
917
|
+
onerror: optional handler for errors. Should be a function, it will be
|
|
918
|
+
called with the error as argument. Rethrowing the error aborts the walk.
|
|
919
|
+
|
|
920
|
+
Errors that happen while listing directories are ignored.
|
|
921
|
+
|
|
922
|
+
Yields:
|
|
923
|
+
Each yield is a 3-tuple: the pathname of a directory, followed by lists
|
|
924
|
+
of all its subdirectories and leaf files.
|
|
925
|
+
(dirname, [subdirname, subdirname, ...], [filename, filename, ...])
|
|
926
|
+
as strings
|
|
927
|
+
"""
|
|
928
|
+
top = compat.as_str_any(top)
|
|
929
|
+
fs = get_filesystem(top)
|
|
930
|
+
try:
|
|
931
|
+
listing = listdir(top)
|
|
932
|
+
except errors.NotFoundError as err:
|
|
933
|
+
if onerror:
|
|
934
|
+
onerror(err)
|
|
935
|
+
else:
|
|
936
|
+
return
|
|
937
|
+
|
|
938
|
+
files = []
|
|
939
|
+
subdirs = []
|
|
940
|
+
for item in listing:
|
|
941
|
+
full_path = fs.join(top, compat.as_str_any(item))
|
|
942
|
+
if isdir(full_path):
|
|
943
|
+
subdirs.append(item)
|
|
944
|
+
else:
|
|
945
|
+
files.append(item)
|
|
946
|
+
|
|
947
|
+
here = (top, subdirs, files)
|
|
948
|
+
|
|
949
|
+
if topdown:
|
|
950
|
+
yield here
|
|
951
|
+
|
|
952
|
+
for subdir in subdirs:
|
|
953
|
+
joined_subdir = fs.join(top, compat.as_str_any(subdir))
|
|
954
|
+
for subitem in walk(joined_subdir, topdown, onerror=onerror):
|
|
955
|
+
yield subitem
|
|
956
|
+
|
|
957
|
+
if not topdown:
|
|
958
|
+
yield here
|
|
959
|
+
|
|
960
|
+
|
|
961
|
+
def stat(filename):
|
|
962
|
+
"""Returns file statistics for a given path.
|
|
963
|
+
|
|
964
|
+
Args:
|
|
965
|
+
filename: string, path to a file
|
|
966
|
+
|
|
967
|
+
Returns:
|
|
968
|
+
FileStatistics struct that contains information about the path
|
|
969
|
+
|
|
970
|
+
Raises:
|
|
971
|
+
errors.OpError: If the operation fails.
|
|
972
|
+
"""
|
|
973
|
+
return get_filesystem(filename).stat(filename)
|
|
974
|
+
|
|
975
|
+
|
|
976
|
+
# Used for tests only
|
|
977
|
+
def _write_string_to_file(filename, file_content):
|
|
978
|
+
"""Writes a string to a given file.
|
|
979
|
+
|
|
980
|
+
Args:
|
|
981
|
+
filename: string, path to a file
|
|
982
|
+
file_content: string, contents that need to be written to the file
|
|
983
|
+
|
|
984
|
+
Raises:
|
|
985
|
+
errors.OpError: If there are errors during the operation.
|
|
986
|
+
"""
|
|
987
|
+
with GFile(filename, mode="w") as f:
|
|
988
|
+
f.write(compat.as_text(file_content))
|
|
989
|
+
|
|
990
|
+
|
|
991
|
+
# Used for tests only
|
|
992
|
+
def _read_file_to_string(filename, binary_mode=False):
|
|
993
|
+
"""Reads the entire contents of a file to a string.
|
|
994
|
+
|
|
995
|
+
Args:
|
|
996
|
+
filename: string, path to a file
|
|
997
|
+
binary_mode: whether to open the file in binary mode or not. This changes
|
|
998
|
+
the type of the object returned.
|
|
999
|
+
|
|
1000
|
+
Returns:
|
|
1001
|
+
contents of the file as a string or bytes.
|
|
1002
|
+
|
|
1003
|
+
Raises:
|
|
1004
|
+
errors.OpError: Raises variety of errors that are subtypes e.g.
|
|
1005
|
+
`NotFoundError` etc.
|
|
1006
|
+
"""
|
|
1007
|
+
if binary_mode:
|
|
1008
|
+
f = GFile(filename, mode="rb")
|
|
1009
|
+
else:
|
|
1010
|
+
f = GFile(filename, mode="r")
|
|
1011
|
+
return f.read()
|