tensorbored 2.21.0rc1769983804__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tensorbored/__init__.py +112 -0
- tensorbored/_vendor/__init__.py +0 -0
- tensorbored/_vendor/bleach/__init__.py +125 -0
- tensorbored/_vendor/bleach/_vendor/__init__.py +0 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/__init__.py +35 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_ihatexml.py +289 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_inputstream.py +918 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_tokenizer.py +1735 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_trie/__init__.py +5 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_trie/_base.py +40 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_trie/py.py +67 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_utils.py +159 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/constants.py +2946 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/__init__.py +0 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/alphabeticalattributes.py +29 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/base.py +12 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/inject_meta_charset.py +73 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/lint.py +93 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/optionaltags.py +207 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/sanitizer.py +916 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/whitespace.py +38 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/html5parser.py +2795 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/serializer.py +409 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/__init__.py +30 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/genshi.py +54 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/sax.py +50 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/__init__.py +88 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/base.py +417 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/dom.py +239 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/etree.py +343 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/etree_lxml.py +392 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/__init__.py +154 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/base.py +252 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/dom.py +43 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/etree.py +131 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/etree_lxml.py +215 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/genshi.py +69 -0
- tensorbored/_vendor/bleach/_vendor/parse.py +1078 -0
- tensorbored/_vendor/bleach/callbacks.py +32 -0
- tensorbored/_vendor/bleach/html5lib_shim.py +757 -0
- tensorbored/_vendor/bleach/linkifier.py +633 -0
- tensorbored/_vendor/bleach/parse_shim.py +1 -0
- tensorbored/_vendor/bleach/sanitizer.py +638 -0
- tensorbored/_vendor/bleach/six_shim.py +19 -0
- tensorbored/_vendor/webencodings/__init__.py +342 -0
- tensorbored/_vendor/webencodings/labels.py +231 -0
- tensorbored/_vendor/webencodings/mklabels.py +59 -0
- tensorbored/_vendor/webencodings/x_user_defined.py +325 -0
- tensorbored/assets.py +36 -0
- tensorbored/auth.py +102 -0
- tensorbored/backend/__init__.py +0 -0
- tensorbored/backend/application.py +604 -0
- tensorbored/backend/auth_context_middleware.py +38 -0
- tensorbored/backend/client_feature_flags.py +113 -0
- tensorbored/backend/empty_path_redirect.py +46 -0
- tensorbored/backend/event_processing/__init__.py +0 -0
- tensorbored/backend/event_processing/data_ingester.py +276 -0
- tensorbored/backend/event_processing/data_provider.py +535 -0
- tensorbored/backend/event_processing/directory_loader.py +142 -0
- tensorbored/backend/event_processing/directory_watcher.py +272 -0
- tensorbored/backend/event_processing/event_accumulator.py +950 -0
- tensorbored/backend/event_processing/event_file_inspector.py +463 -0
- tensorbored/backend/event_processing/event_file_loader.py +292 -0
- tensorbored/backend/event_processing/event_multiplexer.py +521 -0
- tensorbored/backend/event_processing/event_util.py +68 -0
- tensorbored/backend/event_processing/io_wrapper.py +223 -0
- tensorbored/backend/event_processing/plugin_asset_util.py +104 -0
- tensorbored/backend/event_processing/plugin_event_accumulator.py +721 -0
- tensorbored/backend/event_processing/plugin_event_multiplexer.py +522 -0
- tensorbored/backend/event_processing/reservoir.py +266 -0
- tensorbored/backend/event_processing/tag_types.py +29 -0
- tensorbored/backend/experiment_id.py +71 -0
- tensorbored/backend/experimental_plugin.py +51 -0
- tensorbored/backend/http_util.py +263 -0
- tensorbored/backend/json_util.py +70 -0
- tensorbored/backend/path_prefix.py +67 -0
- tensorbored/backend/process_graph.py +74 -0
- tensorbored/backend/security_validator.py +202 -0
- tensorbored/compat/__init__.py +69 -0
- tensorbored/compat/proto/__init__.py +0 -0
- tensorbored/compat/proto/allocation_description_pb2.py +35 -0
- tensorbored/compat/proto/api_def_pb2.py +82 -0
- tensorbored/compat/proto/attr_value_pb2.py +80 -0
- tensorbored/compat/proto/cluster_pb2.py +58 -0
- tensorbored/compat/proto/config_pb2.py +271 -0
- tensorbored/compat/proto/coordination_config_pb2.py +45 -0
- tensorbored/compat/proto/cost_graph_pb2.py +87 -0
- tensorbored/compat/proto/cpp_shape_inference_pb2.py +70 -0
- tensorbored/compat/proto/debug_pb2.py +65 -0
- tensorbored/compat/proto/event_pb2.py +149 -0
- tensorbored/compat/proto/full_type_pb2.py +74 -0
- tensorbored/compat/proto/function_pb2.py +157 -0
- tensorbored/compat/proto/graph_debug_info_pb2.py +111 -0
- tensorbored/compat/proto/graph_pb2.py +41 -0
- tensorbored/compat/proto/histogram_pb2.py +39 -0
- tensorbored/compat/proto/meta_graph_pb2.py +254 -0
- tensorbored/compat/proto/node_def_pb2.py +61 -0
- tensorbored/compat/proto/op_def_pb2.py +81 -0
- tensorbored/compat/proto/resource_handle_pb2.py +48 -0
- tensorbored/compat/proto/rewriter_config_pb2.py +93 -0
- tensorbored/compat/proto/rpc_options_pb2.py +35 -0
- tensorbored/compat/proto/saved_object_graph_pb2.py +193 -0
- tensorbored/compat/proto/saver_pb2.py +38 -0
- tensorbored/compat/proto/step_stats_pb2.py +116 -0
- tensorbored/compat/proto/struct_pb2.py +144 -0
- tensorbored/compat/proto/summary_pb2.py +111 -0
- tensorbored/compat/proto/tensor_description_pb2.py +38 -0
- tensorbored/compat/proto/tensor_pb2.py +68 -0
- tensorbored/compat/proto/tensor_shape_pb2.py +46 -0
- tensorbored/compat/proto/tfprof_log_pb2.py +307 -0
- tensorbored/compat/proto/trackable_object_graph_pb2.py +90 -0
- tensorbored/compat/proto/types_pb2.py +105 -0
- tensorbored/compat/proto/variable_pb2.py +62 -0
- tensorbored/compat/proto/verifier_config_pb2.py +38 -0
- tensorbored/compat/proto/versions_pb2.py +35 -0
- tensorbored/compat/tensorflow_stub/__init__.py +38 -0
- tensorbored/compat/tensorflow_stub/app.py +124 -0
- tensorbored/compat/tensorflow_stub/compat/__init__.py +131 -0
- tensorbored/compat/tensorflow_stub/compat/v1/__init__.py +20 -0
- tensorbored/compat/tensorflow_stub/dtypes.py +692 -0
- tensorbored/compat/tensorflow_stub/error_codes.py +169 -0
- tensorbored/compat/tensorflow_stub/errors.py +507 -0
- tensorbored/compat/tensorflow_stub/flags.py +124 -0
- tensorbored/compat/tensorflow_stub/io/__init__.py +17 -0
- tensorbored/compat/tensorflow_stub/io/gfile.py +1011 -0
- tensorbored/compat/tensorflow_stub/pywrap_tensorflow.py +285 -0
- tensorbored/compat/tensorflow_stub/tensor_shape.py +1035 -0
- tensorbored/context.py +129 -0
- tensorbored/data/__init__.py +0 -0
- tensorbored/data/grpc_provider.py +365 -0
- tensorbored/data/ingester.py +46 -0
- tensorbored/data/proto/__init__.py +0 -0
- tensorbored/data/proto/data_provider_pb2.py +517 -0
- tensorbored/data/proto/data_provider_pb2_grpc.py +374 -0
- tensorbored/data/provider.py +1365 -0
- tensorbored/data/server_ingester.py +301 -0
- tensorbored/data_compat.py +159 -0
- tensorbored/dataclass_compat.py +224 -0
- tensorbored/default.py +124 -0
- tensorbored/errors.py +130 -0
- tensorbored/lazy.py +99 -0
- tensorbored/main.py +48 -0
- tensorbored/main_lib.py +62 -0
- tensorbored/manager.py +487 -0
- tensorbored/notebook.py +441 -0
- tensorbored/plugin_util.py +266 -0
- tensorbored/plugins/__init__.py +0 -0
- tensorbored/plugins/audio/__init__.py +0 -0
- tensorbored/plugins/audio/audio_plugin.py +229 -0
- tensorbored/plugins/audio/metadata.py +69 -0
- tensorbored/plugins/audio/plugin_data_pb2.py +37 -0
- tensorbored/plugins/audio/summary.py +230 -0
- tensorbored/plugins/audio/summary_v2.py +124 -0
- tensorbored/plugins/base_plugin.py +367 -0
- tensorbored/plugins/core/__init__.py +0 -0
- tensorbored/plugins/core/core_plugin.py +981 -0
- tensorbored/plugins/custom_scalar/__init__.py +0 -0
- tensorbored/plugins/custom_scalar/custom_scalars_plugin.py +320 -0
- tensorbored/plugins/custom_scalar/layout_pb2.py +85 -0
- tensorbored/plugins/custom_scalar/metadata.py +35 -0
- tensorbored/plugins/custom_scalar/summary.py +79 -0
- tensorbored/plugins/debugger_v2/__init__.py +0 -0
- tensorbored/plugins/debugger_v2/debug_data_multiplexer.py +631 -0
- tensorbored/plugins/debugger_v2/debug_data_provider.py +634 -0
- tensorbored/plugins/debugger_v2/debugger_v2_plugin.py +504 -0
- tensorbored/plugins/distribution/__init__.py +0 -0
- tensorbored/plugins/distribution/compressor.py +158 -0
- tensorbored/plugins/distribution/distributions_plugin.py +116 -0
- tensorbored/plugins/distribution/metadata.py +19 -0
- tensorbored/plugins/graph/__init__.py +0 -0
- tensorbored/plugins/graph/graph_util.py +129 -0
- tensorbored/plugins/graph/graphs_plugin.py +336 -0
- tensorbored/plugins/graph/keras_util.py +328 -0
- tensorbored/plugins/graph/metadata.py +42 -0
- tensorbored/plugins/histogram/__init__.py +0 -0
- tensorbored/plugins/histogram/histograms_plugin.py +144 -0
- tensorbored/plugins/histogram/metadata.py +63 -0
- tensorbored/plugins/histogram/plugin_data_pb2.py +34 -0
- tensorbored/plugins/histogram/summary.py +234 -0
- tensorbored/plugins/histogram/summary_v2.py +292 -0
- tensorbored/plugins/hparams/__init__.py +14 -0
- tensorbored/plugins/hparams/_keras.py +93 -0
- tensorbored/plugins/hparams/api.py +130 -0
- tensorbored/plugins/hparams/api_pb2.py +208 -0
- tensorbored/plugins/hparams/backend_context.py +606 -0
- tensorbored/plugins/hparams/download_data.py +158 -0
- tensorbored/plugins/hparams/error.py +26 -0
- tensorbored/plugins/hparams/get_experiment.py +71 -0
- tensorbored/plugins/hparams/hparams_plugin.py +206 -0
- tensorbored/plugins/hparams/hparams_util_pb2.py +69 -0
- tensorbored/plugins/hparams/json_format_compat.py +38 -0
- tensorbored/plugins/hparams/list_metric_evals.py +57 -0
- tensorbored/plugins/hparams/list_session_groups.py +1040 -0
- tensorbored/plugins/hparams/metadata.py +125 -0
- tensorbored/plugins/hparams/metrics.py +41 -0
- tensorbored/plugins/hparams/plugin_data_pb2.py +69 -0
- tensorbored/plugins/hparams/summary.py +205 -0
- tensorbored/plugins/hparams/summary_v2.py +597 -0
- tensorbored/plugins/image/__init__.py +0 -0
- tensorbored/plugins/image/images_plugin.py +232 -0
- tensorbored/plugins/image/metadata.py +65 -0
- tensorbored/plugins/image/plugin_data_pb2.py +34 -0
- tensorbored/plugins/image/summary.py +159 -0
- tensorbored/plugins/image/summary_v2.py +130 -0
- tensorbored/plugins/mesh/__init__.py +14 -0
- tensorbored/plugins/mesh/mesh_plugin.py +292 -0
- tensorbored/plugins/mesh/metadata.py +152 -0
- tensorbored/plugins/mesh/plugin_data_pb2.py +37 -0
- tensorbored/plugins/mesh/summary.py +251 -0
- tensorbored/plugins/mesh/summary_v2.py +214 -0
- tensorbored/plugins/metrics/__init__.py +0 -0
- tensorbored/plugins/metrics/metadata.py +17 -0
- tensorbored/plugins/metrics/metrics_plugin.py +623 -0
- tensorbored/plugins/pr_curve/__init__.py +0 -0
- tensorbored/plugins/pr_curve/metadata.py +75 -0
- tensorbored/plugins/pr_curve/plugin_data_pb2.py +34 -0
- tensorbored/plugins/pr_curve/pr_curves_plugin.py +241 -0
- tensorbored/plugins/pr_curve/summary.py +574 -0
- tensorbored/plugins/profile_redirect/__init__.py +0 -0
- tensorbored/plugins/profile_redirect/profile_redirect_plugin.py +49 -0
- tensorbored/plugins/projector/__init__.py +67 -0
- tensorbored/plugins/projector/metadata.py +26 -0
- tensorbored/plugins/projector/projector_config_pb2.py +54 -0
- tensorbored/plugins/projector/projector_plugin.py +795 -0
- tensorbored/plugins/projector/tf_projector_plugin/index.js +32 -0
- tensorbored/plugins/projector/tf_projector_plugin/projector_binary.html +524 -0
- tensorbored/plugins/projector/tf_projector_plugin/projector_binary.js +15536 -0
- tensorbored/plugins/scalar/__init__.py +0 -0
- tensorbored/plugins/scalar/metadata.py +60 -0
- tensorbored/plugins/scalar/plugin_data_pb2.py +34 -0
- tensorbored/plugins/scalar/scalars_plugin.py +181 -0
- tensorbored/plugins/scalar/summary.py +109 -0
- tensorbored/plugins/scalar/summary_v2.py +124 -0
- tensorbored/plugins/text/__init__.py +0 -0
- tensorbored/plugins/text/metadata.py +62 -0
- tensorbored/plugins/text/plugin_data_pb2.py +34 -0
- tensorbored/plugins/text/summary.py +114 -0
- tensorbored/plugins/text/summary_v2.py +124 -0
- tensorbored/plugins/text/text_plugin.py +288 -0
- tensorbored/plugins/wit_redirect/__init__.py +0 -0
- tensorbored/plugins/wit_redirect/wit_redirect_plugin.py +49 -0
- tensorbored/program.py +910 -0
- tensorbored/summary/__init__.py +35 -0
- tensorbored/summary/_output.py +124 -0
- tensorbored/summary/_tf/__init__.py +14 -0
- tensorbored/summary/_tf/summary/__init__.py +178 -0
- tensorbored/summary/_writer.py +105 -0
- tensorbored/summary/v1.py +51 -0
- tensorbored/summary/v2.py +25 -0
- tensorbored/summary/writer/__init__.py +13 -0
- tensorbored/summary/writer/event_file_writer.py +291 -0
- tensorbored/summary/writer/record_writer.py +50 -0
- tensorbored/util/__init__.py +0 -0
- tensorbored/util/encoder.py +116 -0
- tensorbored/util/grpc_util.py +311 -0
- tensorbored/util/img_mime_type_detector.py +40 -0
- tensorbored/util/io_util.py +20 -0
- tensorbored/util/lazy_tensor_creator.py +110 -0
- tensorbored/util/op_evaluator.py +104 -0
- tensorbored/util/platform_util.py +20 -0
- tensorbored/util/tb_logging.py +24 -0
- tensorbored/util/tensor_util.py +617 -0
- tensorbored/util/timing.py +122 -0
- tensorbored/version.py +21 -0
- tensorbored/webfiles.zip +0 -0
- tensorbored-2.21.0rc1769983804.dist-info/METADATA +49 -0
- tensorbored-2.21.0rc1769983804.dist-info/RECORD +271 -0
- tensorbored-2.21.0rc1769983804.dist-info/WHEEL +5 -0
- tensorbored-2.21.0rc1769983804.dist-info/entry_points.txt +6 -0
- tensorbored-2.21.0rc1769983804.dist-info/licenses/LICENSE +739 -0
- tensorbored-2.21.0rc1769983804.dist-info/top_level.txt +1 -0
tensorbored/__init__.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
"""TensorBoard is a webapp for understanding TensorFlow runs and graphs."""
|
|
16
|
+
|
|
17
|
+
from tensorbored import lazy as _lazy
|
|
18
|
+
from tensorbored import version as _version
|
|
19
|
+
|
|
20
|
+
# TensorBoard public API.
|
|
21
|
+
__all__ = [
|
|
22
|
+
"__version__",
|
|
23
|
+
"errors",
|
|
24
|
+
"notebook",
|
|
25
|
+
"program",
|
|
26
|
+
"summary",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# Please be careful when changing the structure of this file.
|
|
31
|
+
#
|
|
32
|
+
# The lazy imports in this file must use `importlib.import_module`, not
|
|
33
|
+
# `import tensorbored.foo` or `from tensorbored import foo`, or it will
|
|
34
|
+
# be impossible to reload the TensorBored module without breaking these
|
|
35
|
+
# top-level public APIs. This has to do with the gory details of
|
|
36
|
+
# Python's module system. Take `tensorbored.notebook` as an example:
|
|
37
|
+
#
|
|
38
|
+
# - When the `tensorbored` module (that's us!) is initialized, its
|
|
39
|
+
# `notebook` attribute is initialized to a new LazyModule. The
|
|
40
|
+
# actual `tensorbored.notebook` submodule is not loaded.
|
|
41
|
+
#
|
|
42
|
+
# - When the `tensorbored.notebook` submodule is first loaded, Python
|
|
43
|
+
# _reassigns_ the `notebook` attribute on the `tensorbored` module
|
|
44
|
+
# object to point to the underlying `tensorbored.notebook` module
|
|
45
|
+
# object, rather than its former LazyModule value. This occurs
|
|
46
|
+
# whether the module is loaded via the lazy module or directly as an
|
|
47
|
+
# import:
|
|
48
|
+
#
|
|
49
|
+
# - import tensorbored; tensorbored.notebook.start(...) # one way
|
|
50
|
+
# - from tensorbored import notebook # other way; same effect
|
|
51
|
+
#
|
|
52
|
+
# - When the `tensorbored` module is reloaded, its `notebook`
|
|
53
|
+
# attribute is once again bound to a (new) LazyModule, while the
|
|
54
|
+
# `tensorbored.notebook` module object is unaffected and still
|
|
55
|
+
# exists in `sys.modules`. But then...
|
|
56
|
+
#
|
|
57
|
+
# - When the new LazyModule is forced, it must resolve to the existing
|
|
58
|
+
# `tensorbored.notebook` module object rather than itself (which
|
|
59
|
+
# just creates a stack overflow). If the LazyModule load function
|
|
60
|
+
# uses `import tensorbored.notebook; return tensorbored.notebook`,
|
|
61
|
+
# then the first statement will do _nothing_ because the
|
|
62
|
+
# `tensorbored.notebook` module is already loaded, and the second
|
|
63
|
+
# statement will return the LazyModule itself. The same goes for the
|
|
64
|
+
# `from tensorbored import notebook` form. We need to ensure that
|
|
65
|
+
# the submodule is loaded and then pull the actual module object out
|
|
66
|
+
# of `sys.modules`... which is exactly what `importlib` handles for
|
|
67
|
+
# us.
|
|
68
|
+
#
|
|
69
|
+
# See <https://github.com/tensorflow/tensorboard/issues/1989> for
|
|
70
|
+
# additional discussion.
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@_lazy.lazy_load("tensorbored.errors")
|
|
74
|
+
def errors():
|
|
75
|
+
import importlib
|
|
76
|
+
|
|
77
|
+
return importlib.import_module("tensorbored.errors")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@_lazy.lazy_load("tensorbored.notebook")
|
|
81
|
+
def notebook():
|
|
82
|
+
import importlib
|
|
83
|
+
|
|
84
|
+
return importlib.import_module("tensorbored.notebook")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@_lazy.lazy_load("tensorbored.program")
|
|
88
|
+
def program():
|
|
89
|
+
import importlib
|
|
90
|
+
|
|
91
|
+
return importlib.import_module("tensorbored.program")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@_lazy.lazy_load("tensorbored.summary")
|
|
95
|
+
def summary():
|
|
96
|
+
import importlib
|
|
97
|
+
|
|
98
|
+
return importlib.import_module("tensorbored.summary")
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def load_ipython_extension(ipython):
|
|
102
|
+
"""IPython API entry point.
|
|
103
|
+
|
|
104
|
+
Only intended to be called by the IPython runtime.
|
|
105
|
+
|
|
106
|
+
See:
|
|
107
|
+
https://ipython.readthedocs.io/en/stable/config/extensions/index.html
|
|
108
|
+
"""
|
|
109
|
+
notebook._load_ipython_extension(ipython)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
__version__ = _version.VERSION
|
|
File without changes
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
from tensorbored._vendor.bleach.linkifier import (
|
|
2
|
+
DEFAULT_CALLBACKS,
|
|
3
|
+
Linker,
|
|
4
|
+
)
|
|
5
|
+
from tensorbored._vendor.bleach.sanitizer import (
|
|
6
|
+
ALLOWED_ATTRIBUTES,
|
|
7
|
+
ALLOWED_PROTOCOLS,
|
|
8
|
+
ALLOWED_TAGS,
|
|
9
|
+
Cleaner,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# yyyymmdd
|
|
14
|
+
__releasedate__ = "20241029"
|
|
15
|
+
# x.y.z or x.y.z.dev0 -- semver
|
|
16
|
+
__version__ = "6.2.0"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
__all__ = ["clean", "linkify"]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def clean(
|
|
23
|
+
text,
|
|
24
|
+
tags=ALLOWED_TAGS,
|
|
25
|
+
attributes=ALLOWED_ATTRIBUTES,
|
|
26
|
+
protocols=ALLOWED_PROTOCOLS,
|
|
27
|
+
strip=False,
|
|
28
|
+
strip_comments=True,
|
|
29
|
+
css_sanitizer=None,
|
|
30
|
+
):
|
|
31
|
+
"""Clean an HTML fragment of malicious content and return it
|
|
32
|
+
|
|
33
|
+
This function is a security-focused function whose sole purpose is to
|
|
34
|
+
remove malicious content from a string such that it can be displayed as
|
|
35
|
+
content in a web page.
|
|
36
|
+
|
|
37
|
+
This function is not designed to use to transform content to be used in
|
|
38
|
+
non-web-page contexts.
|
|
39
|
+
|
|
40
|
+
Example::
|
|
41
|
+
|
|
42
|
+
import bleach
|
|
43
|
+
|
|
44
|
+
better_text = bleach.clean(yucky_text)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
.. Note::
|
|
48
|
+
|
|
49
|
+
If you're cleaning a lot of text and passing the same argument values or
|
|
50
|
+
you want more configurability, consider using a
|
|
51
|
+
:py:class:`bleach.sanitizer.Cleaner` instance.
|
|
52
|
+
|
|
53
|
+
:arg str text: the text to clean
|
|
54
|
+
|
|
55
|
+
:arg set tags: set of allowed tags; defaults to
|
|
56
|
+
``bleach.sanitizer.ALLOWED_TAGS``
|
|
57
|
+
|
|
58
|
+
:arg dict attributes: allowed attributes; can be a callable, list or dict;
|
|
59
|
+
defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
|
|
60
|
+
|
|
61
|
+
:arg list protocols: allowed list of protocols for links; defaults
|
|
62
|
+
to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
|
|
63
|
+
|
|
64
|
+
:arg bool strip: whether or not to strip disallowed elements
|
|
65
|
+
|
|
66
|
+
:arg bool strip_comments: whether or not to strip HTML comments
|
|
67
|
+
|
|
68
|
+
:arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
|
|
69
|
+
sanitizing style attribute values and style text; defaults to None
|
|
70
|
+
|
|
71
|
+
:returns: cleaned text as unicode
|
|
72
|
+
|
|
73
|
+
"""
|
|
74
|
+
cleaner = Cleaner(
|
|
75
|
+
tags=tags,
|
|
76
|
+
attributes=attributes,
|
|
77
|
+
protocols=protocols,
|
|
78
|
+
strip=strip,
|
|
79
|
+
strip_comments=strip_comments,
|
|
80
|
+
css_sanitizer=css_sanitizer,
|
|
81
|
+
)
|
|
82
|
+
return cleaner.clean(text)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_tags=None, parse_email=False):
|
|
86
|
+
"""Convert URL-like strings in an HTML fragment to links
|
|
87
|
+
|
|
88
|
+
This function converts strings that look like URLs, domain names and email
|
|
89
|
+
addresses in text that may be an HTML fragment to links, while preserving:
|
|
90
|
+
|
|
91
|
+
1. links already in the string
|
|
92
|
+
2. urls found in attributes
|
|
93
|
+
3. email addresses
|
|
94
|
+
|
|
95
|
+
linkify does a best-effort approach and tries to recover from bad
|
|
96
|
+
situations due to crazy text.
|
|
97
|
+
|
|
98
|
+
.. Note::
|
|
99
|
+
|
|
100
|
+
If you're linking a lot of text and passing the same argument values or
|
|
101
|
+
you want more configurability, consider using a
|
|
102
|
+
:py:class:`bleach.linkifier.Linker` instance.
|
|
103
|
+
|
|
104
|
+
.. Note::
|
|
105
|
+
|
|
106
|
+
If you have text that you want to clean and then linkify, consider using
|
|
107
|
+
the :py:class:`bleach.linkifier.LinkifyFilter` as a filter in the clean
|
|
108
|
+
pass. That way you're not parsing the HTML twice.
|
|
109
|
+
|
|
110
|
+
:arg str text: the text to linkify
|
|
111
|
+
|
|
112
|
+
:arg list callbacks: list of callbacks to run when adjusting tag attributes;
|
|
113
|
+
defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
|
|
114
|
+
|
|
115
|
+
:arg list skip_tags: list of tags that you don't want to linkify the
|
|
116
|
+
contents of; for example, you could set this to ``['pre']`` to skip
|
|
117
|
+
linkifying contents of ``pre`` tags
|
|
118
|
+
|
|
119
|
+
:arg bool parse_email: whether or not to linkify email addresses
|
|
120
|
+
|
|
121
|
+
:returns: linkified text as unicode
|
|
122
|
+
|
|
123
|
+
"""
|
|
124
|
+
linker = Linker(callbacks=callbacks, skip_tags=skip_tags, parse_email=parse_email)
|
|
125
|
+
return linker.linkify(text)
|
|
File without changes
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""
|
|
2
|
+
HTML parsing library based on the `WHATWG HTML specification
|
|
3
|
+
<https://whatwg.org/html>`_. The parser is designed to be compatible with
|
|
4
|
+
existing HTML found in the wild and implements well-defined error recovery that
|
|
5
|
+
is largely compatible with modern desktop web browsers.
|
|
6
|
+
|
|
7
|
+
Example usage::
|
|
8
|
+
|
|
9
|
+
import html5lib
|
|
10
|
+
with open("my_document.html", "rb") as f:
|
|
11
|
+
tree = html5lib.parse(f)
|
|
12
|
+
|
|
13
|
+
For convenience, this module re-exports the following names:
|
|
14
|
+
|
|
15
|
+
* :func:`~.html5parser.parse`
|
|
16
|
+
* :func:`~.html5parser.parseFragment`
|
|
17
|
+
* :class:`~.html5parser.HTMLParser`
|
|
18
|
+
* :func:`~.treebuilders.getTreeBuilder`
|
|
19
|
+
* :func:`~.treewalkers.getTreeWalker`
|
|
20
|
+
* :func:`~.serializer.serialize`
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import absolute_import, division, unicode_literals
|
|
24
|
+
|
|
25
|
+
from .html5parser import HTMLParser, parse, parseFragment
|
|
26
|
+
from .treebuilders import getTreeBuilder
|
|
27
|
+
from .treewalkers import getTreeWalker
|
|
28
|
+
from .serializer import serialize
|
|
29
|
+
|
|
30
|
+
__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
|
|
31
|
+
"getTreeWalker", "serialize"]
|
|
32
|
+
|
|
33
|
+
# this has to be at the top level, see how setup.py parses this
|
|
34
|
+
#: Distribution version number.
|
|
35
|
+
__version__ = "1.1"
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
from __future__ import absolute_import, division, unicode_literals
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import warnings
|
|
5
|
+
|
|
6
|
+
from .constants import DataLossWarning
|
|
7
|
+
|
|
8
|
+
baseChar = """
|
|
9
|
+
[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] |
|
|
10
|
+
[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] |
|
|
11
|
+
[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] |
|
|
12
|
+
[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 |
|
|
13
|
+
[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] |
|
|
14
|
+
[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] |
|
|
15
|
+
[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] |
|
|
16
|
+
[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] |
|
|
17
|
+
[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 |
|
|
18
|
+
[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] |
|
|
19
|
+
[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] |
|
|
20
|
+
[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D |
|
|
21
|
+
[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] |
|
|
22
|
+
[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] |
|
|
23
|
+
[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] |
|
|
24
|
+
[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] |
|
|
25
|
+
[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] |
|
|
26
|
+
[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] |
|
|
27
|
+
[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 |
|
|
28
|
+
[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] |
|
|
29
|
+
[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] |
|
|
30
|
+
[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] |
|
|
31
|
+
[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] |
|
|
32
|
+
[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] |
|
|
33
|
+
[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] |
|
|
34
|
+
[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] |
|
|
35
|
+
[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] |
|
|
36
|
+
[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] |
|
|
37
|
+
[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] |
|
|
38
|
+
[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A |
|
|
39
|
+
#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 |
|
|
40
|
+
#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] |
|
|
41
|
+
#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] |
|
|
42
|
+
[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] |
|
|
43
|
+
[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C |
|
|
44
|
+
#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 |
|
|
45
|
+
[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] |
|
|
46
|
+
[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] |
|
|
47
|
+
[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 |
|
|
48
|
+
[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] |
|
|
49
|
+
[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B |
|
|
50
|
+
#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE |
|
|
51
|
+
[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] |
|
|
52
|
+
[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 |
|
|
53
|
+
[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] |
|
|
54
|
+
[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
|
|
55
|
+
|
|
56
|
+
ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]"""
|
|
57
|
+
|
|
58
|
+
combiningCharacter = """
|
|
59
|
+
[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] |
|
|
60
|
+
[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 |
|
|
61
|
+
[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] |
|
|
62
|
+
[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] |
|
|
63
|
+
#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] |
|
|
64
|
+
[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] |
|
|
65
|
+
[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 |
|
|
66
|
+
#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] |
|
|
67
|
+
[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC |
|
|
68
|
+
[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] |
|
|
69
|
+
#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] |
|
|
70
|
+
[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] |
|
|
71
|
+
[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] |
|
|
72
|
+
[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] |
|
|
73
|
+
[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] |
|
|
74
|
+
[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] |
|
|
75
|
+
#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 |
|
|
76
|
+
[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] |
|
|
77
|
+
#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] |
|
|
78
|
+
[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] |
|
|
79
|
+
[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] |
|
|
80
|
+
#x3099 | #x309A"""
|
|
81
|
+
|
|
82
|
+
digit = """
|
|
83
|
+
[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] |
|
|
84
|
+
[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] |
|
|
85
|
+
[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] |
|
|
86
|
+
[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]"""
|
|
87
|
+
|
|
88
|
+
extender = """
|
|
89
|
+
#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 |
|
|
90
|
+
#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]"""
|
|
91
|
+
|
|
92
|
+
letter = " | ".join([baseChar, ideographic])
|
|
93
|
+
|
|
94
|
+
# Without the
|
|
95
|
+
name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter,
|
|
96
|
+
extender])
|
|
97
|
+
nameFirst = " | ".join([letter, "_"])
|
|
98
|
+
|
|
99
|
+
reChar = re.compile(r"#x([\d|A-F]{4,4})")
|
|
100
|
+
reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def charStringToList(chars):
|
|
104
|
+
charRanges = [item.strip() for item in chars.split(" | ")]
|
|
105
|
+
rv = []
|
|
106
|
+
for item in charRanges:
|
|
107
|
+
foundMatch = False
|
|
108
|
+
for regexp in (reChar, reCharRange):
|
|
109
|
+
match = regexp.match(item)
|
|
110
|
+
if match is not None:
|
|
111
|
+
rv.append([hexToInt(item) for item in match.groups()])
|
|
112
|
+
if len(rv[-1]) == 1:
|
|
113
|
+
rv[-1] = rv[-1] * 2
|
|
114
|
+
foundMatch = True
|
|
115
|
+
break
|
|
116
|
+
if not foundMatch:
|
|
117
|
+
assert len(item) == 1
|
|
118
|
+
|
|
119
|
+
rv.append([ord(item)] * 2)
|
|
120
|
+
rv = normaliseCharList(rv)
|
|
121
|
+
return rv
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def normaliseCharList(charList):
|
|
125
|
+
charList = sorted(charList)
|
|
126
|
+
for item in charList:
|
|
127
|
+
assert item[1] >= item[0]
|
|
128
|
+
rv = []
|
|
129
|
+
i = 0
|
|
130
|
+
while i < len(charList):
|
|
131
|
+
j = 1
|
|
132
|
+
rv.append(charList[i])
|
|
133
|
+
while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1:
|
|
134
|
+
rv[-1][1] = charList[i + j][1]
|
|
135
|
+
j += 1
|
|
136
|
+
i += j
|
|
137
|
+
return rv
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
# We don't really support characters above the BMP :(
|
|
141
|
+
max_unicode = int("FFFF", 16)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def missingRanges(charList):
|
|
145
|
+
rv = []
|
|
146
|
+
if charList[0] != 0:
|
|
147
|
+
rv.append([0, charList[0][0] - 1])
|
|
148
|
+
for i, item in enumerate(charList[:-1]):
|
|
149
|
+
rv.append([item[1] + 1, charList[i + 1][0] - 1])
|
|
150
|
+
if charList[-1][1] != max_unicode:
|
|
151
|
+
rv.append([charList[-1][1] + 1, max_unicode])
|
|
152
|
+
return rv
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def listToRegexpStr(charList):
|
|
156
|
+
rv = []
|
|
157
|
+
for item in charList:
|
|
158
|
+
if item[0] == item[1]:
|
|
159
|
+
rv.append(escapeRegexp(chr(item[0])))
|
|
160
|
+
else:
|
|
161
|
+
rv.append(escapeRegexp(chr(item[0])) + "-" +
|
|
162
|
+
escapeRegexp(chr(item[1])))
|
|
163
|
+
return "[%s]" % "".join(rv)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def hexToInt(hex_str):
|
|
167
|
+
return int(hex_str, 16)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def escapeRegexp(string):
|
|
171
|
+
specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}",
|
|
172
|
+
"[", "]", "|", "(", ")", "-")
|
|
173
|
+
for char in specialCharacters:
|
|
174
|
+
string = string.replace(char, "\\" + char)
|
|
175
|
+
|
|
176
|
+
return string
|
|
177
|
+
|
|
178
|
+
# output from the above
|
|
179
|
+
nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa
|
|
180
|
+
|
|
181
|
+
nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa
|
|
182
|
+
|
|
183
|
+
# Simpler things
|
|
184
|
+
nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class InfosetFilter(object):
|
|
188
|
+
replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
|
|
189
|
+
|
|
190
|
+
def __init__(self,
|
|
191
|
+
dropXmlnsLocalName=False,
|
|
192
|
+
dropXmlnsAttrNs=False,
|
|
193
|
+
preventDoubleDashComments=False,
|
|
194
|
+
preventDashAtCommentEnd=False,
|
|
195
|
+
replaceFormFeedCharacters=True,
|
|
196
|
+
preventSingleQuotePubid=False):
|
|
197
|
+
|
|
198
|
+
self.dropXmlnsLocalName = dropXmlnsLocalName
|
|
199
|
+
self.dropXmlnsAttrNs = dropXmlnsAttrNs
|
|
200
|
+
|
|
201
|
+
self.preventDoubleDashComments = preventDoubleDashComments
|
|
202
|
+
self.preventDashAtCommentEnd = preventDashAtCommentEnd
|
|
203
|
+
|
|
204
|
+
self.replaceFormFeedCharacters = replaceFormFeedCharacters
|
|
205
|
+
|
|
206
|
+
self.preventSingleQuotePubid = preventSingleQuotePubid
|
|
207
|
+
|
|
208
|
+
self.replaceCache = {}
|
|
209
|
+
|
|
210
|
+
def coerceAttribute(self, name, namespace=None):
|
|
211
|
+
if self.dropXmlnsLocalName and name.startswith("xmlns:"):
|
|
212
|
+
warnings.warn("Attributes cannot begin with xmlns", DataLossWarning)
|
|
213
|
+
return None
|
|
214
|
+
elif (self.dropXmlnsAttrNs and
|
|
215
|
+
namespace == "http://www.w3.org/2000/xmlns/"):
|
|
216
|
+
warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning)
|
|
217
|
+
return None
|
|
218
|
+
else:
|
|
219
|
+
return self.toXmlName(name)
|
|
220
|
+
|
|
221
|
+
def coerceElement(self, name):
|
|
222
|
+
return self.toXmlName(name)
|
|
223
|
+
|
|
224
|
+
def coerceComment(self, data):
|
|
225
|
+
if self.preventDoubleDashComments:
|
|
226
|
+
while "--" in data:
|
|
227
|
+
warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
|
|
228
|
+
data = data.replace("--", "- -")
|
|
229
|
+
if data.endswith("-"):
|
|
230
|
+
warnings.warn("Comments cannot end in a dash", DataLossWarning)
|
|
231
|
+
data += " "
|
|
232
|
+
return data
|
|
233
|
+
|
|
234
|
+
def coerceCharacters(self, data):
|
|
235
|
+
if self.replaceFormFeedCharacters:
|
|
236
|
+
for _ in range(data.count("\x0C")):
|
|
237
|
+
warnings.warn("Text cannot contain U+000C", DataLossWarning)
|
|
238
|
+
data = data.replace("\x0C", " ")
|
|
239
|
+
# Other non-xml characters
|
|
240
|
+
return data
|
|
241
|
+
|
|
242
|
+
def coercePubid(self, data):
|
|
243
|
+
dataOutput = data
|
|
244
|
+
for char in nonPubidCharRegexp.findall(data):
|
|
245
|
+
warnings.warn("Coercing non-XML pubid", DataLossWarning)
|
|
246
|
+
replacement = self.getReplacementCharacter(char)
|
|
247
|
+
dataOutput = dataOutput.replace(char, replacement)
|
|
248
|
+
if self.preventSingleQuotePubid and dataOutput.find("'") >= 0:
|
|
249
|
+
warnings.warn("Pubid cannot contain single quote", DataLossWarning)
|
|
250
|
+
dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'"))
|
|
251
|
+
return dataOutput
|
|
252
|
+
|
|
253
|
+
def toXmlName(self, name):
|
|
254
|
+
nameFirst = name[0]
|
|
255
|
+
nameRest = name[1:]
|
|
256
|
+
m = nonXmlNameFirstBMPRegexp.match(nameFirst)
|
|
257
|
+
if m:
|
|
258
|
+
warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
|
|
259
|
+
nameFirstOutput = self.getReplacementCharacter(nameFirst)
|
|
260
|
+
else:
|
|
261
|
+
nameFirstOutput = nameFirst
|
|
262
|
+
|
|
263
|
+
nameRestOutput = nameRest
|
|
264
|
+
replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
|
|
265
|
+
for char in replaceChars:
|
|
266
|
+
warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
|
|
267
|
+
replacement = self.getReplacementCharacter(char)
|
|
268
|
+
nameRestOutput = nameRestOutput.replace(char, replacement)
|
|
269
|
+
return nameFirstOutput + nameRestOutput
|
|
270
|
+
|
|
271
|
+
def getReplacementCharacter(self, char):
|
|
272
|
+
if char in self.replaceCache:
|
|
273
|
+
replacement = self.replaceCache[char]
|
|
274
|
+
else:
|
|
275
|
+
replacement = self.escapeChar(char)
|
|
276
|
+
return replacement
|
|
277
|
+
|
|
278
|
+
def fromXmlName(self, name):
|
|
279
|
+
for item in set(self.replacementRegexp.findall(name)):
|
|
280
|
+
name = name.replace(item, self.unescapeChar(item))
|
|
281
|
+
return name
|
|
282
|
+
|
|
283
|
+
def escapeChar(self, char):
|
|
284
|
+
replacement = "U%05X" % ord(char)
|
|
285
|
+
self.replaceCache[char] = replacement
|
|
286
|
+
return replacement
|
|
287
|
+
|
|
288
|
+
def unescapeChar(self, charcode):
|
|
289
|
+
return chr(int(charcode[1:], 16))
|