tensorbored 2.21.0rc1769983804__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tensorbored/__init__.py +112 -0
- tensorbored/_vendor/__init__.py +0 -0
- tensorbored/_vendor/bleach/__init__.py +125 -0
- tensorbored/_vendor/bleach/_vendor/__init__.py +0 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/__init__.py +35 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_ihatexml.py +289 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_inputstream.py +918 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_tokenizer.py +1735 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_trie/__init__.py +5 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_trie/_base.py +40 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_trie/py.py +67 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_utils.py +159 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/constants.py +2946 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/__init__.py +0 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/alphabeticalattributes.py +29 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/base.py +12 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/inject_meta_charset.py +73 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/lint.py +93 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/optionaltags.py +207 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/sanitizer.py +916 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/whitespace.py +38 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/html5parser.py +2795 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/serializer.py +409 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/__init__.py +30 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/genshi.py +54 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/sax.py +50 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/__init__.py +88 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/base.py +417 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/dom.py +239 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/etree.py +343 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/etree_lxml.py +392 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/__init__.py +154 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/base.py +252 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/dom.py +43 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/etree.py +131 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/etree_lxml.py +215 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/genshi.py +69 -0
- tensorbored/_vendor/bleach/_vendor/parse.py +1078 -0
- tensorbored/_vendor/bleach/callbacks.py +32 -0
- tensorbored/_vendor/bleach/html5lib_shim.py +757 -0
- tensorbored/_vendor/bleach/linkifier.py +633 -0
- tensorbored/_vendor/bleach/parse_shim.py +1 -0
- tensorbored/_vendor/bleach/sanitizer.py +638 -0
- tensorbored/_vendor/bleach/six_shim.py +19 -0
- tensorbored/_vendor/webencodings/__init__.py +342 -0
- tensorbored/_vendor/webencodings/labels.py +231 -0
- tensorbored/_vendor/webencodings/mklabels.py +59 -0
- tensorbored/_vendor/webencodings/x_user_defined.py +325 -0
- tensorbored/assets.py +36 -0
- tensorbored/auth.py +102 -0
- tensorbored/backend/__init__.py +0 -0
- tensorbored/backend/application.py +604 -0
- tensorbored/backend/auth_context_middleware.py +38 -0
- tensorbored/backend/client_feature_flags.py +113 -0
- tensorbored/backend/empty_path_redirect.py +46 -0
- tensorbored/backend/event_processing/__init__.py +0 -0
- tensorbored/backend/event_processing/data_ingester.py +276 -0
- tensorbored/backend/event_processing/data_provider.py +535 -0
- tensorbored/backend/event_processing/directory_loader.py +142 -0
- tensorbored/backend/event_processing/directory_watcher.py +272 -0
- tensorbored/backend/event_processing/event_accumulator.py +950 -0
- tensorbored/backend/event_processing/event_file_inspector.py +463 -0
- tensorbored/backend/event_processing/event_file_loader.py +292 -0
- tensorbored/backend/event_processing/event_multiplexer.py +521 -0
- tensorbored/backend/event_processing/event_util.py +68 -0
- tensorbored/backend/event_processing/io_wrapper.py +223 -0
- tensorbored/backend/event_processing/plugin_asset_util.py +104 -0
- tensorbored/backend/event_processing/plugin_event_accumulator.py +721 -0
- tensorbored/backend/event_processing/plugin_event_multiplexer.py +522 -0
- tensorbored/backend/event_processing/reservoir.py +266 -0
- tensorbored/backend/event_processing/tag_types.py +29 -0
- tensorbored/backend/experiment_id.py +71 -0
- tensorbored/backend/experimental_plugin.py +51 -0
- tensorbored/backend/http_util.py +263 -0
- tensorbored/backend/json_util.py +70 -0
- tensorbored/backend/path_prefix.py +67 -0
- tensorbored/backend/process_graph.py +74 -0
- tensorbored/backend/security_validator.py +202 -0
- tensorbored/compat/__init__.py +69 -0
- tensorbored/compat/proto/__init__.py +0 -0
- tensorbored/compat/proto/allocation_description_pb2.py +35 -0
- tensorbored/compat/proto/api_def_pb2.py +82 -0
- tensorbored/compat/proto/attr_value_pb2.py +80 -0
- tensorbored/compat/proto/cluster_pb2.py +58 -0
- tensorbored/compat/proto/config_pb2.py +271 -0
- tensorbored/compat/proto/coordination_config_pb2.py +45 -0
- tensorbored/compat/proto/cost_graph_pb2.py +87 -0
- tensorbored/compat/proto/cpp_shape_inference_pb2.py +70 -0
- tensorbored/compat/proto/debug_pb2.py +65 -0
- tensorbored/compat/proto/event_pb2.py +149 -0
- tensorbored/compat/proto/full_type_pb2.py +74 -0
- tensorbored/compat/proto/function_pb2.py +157 -0
- tensorbored/compat/proto/graph_debug_info_pb2.py +111 -0
- tensorbored/compat/proto/graph_pb2.py +41 -0
- tensorbored/compat/proto/histogram_pb2.py +39 -0
- tensorbored/compat/proto/meta_graph_pb2.py +254 -0
- tensorbored/compat/proto/node_def_pb2.py +61 -0
- tensorbored/compat/proto/op_def_pb2.py +81 -0
- tensorbored/compat/proto/resource_handle_pb2.py +48 -0
- tensorbored/compat/proto/rewriter_config_pb2.py +93 -0
- tensorbored/compat/proto/rpc_options_pb2.py +35 -0
- tensorbored/compat/proto/saved_object_graph_pb2.py +193 -0
- tensorbored/compat/proto/saver_pb2.py +38 -0
- tensorbored/compat/proto/step_stats_pb2.py +116 -0
- tensorbored/compat/proto/struct_pb2.py +144 -0
- tensorbored/compat/proto/summary_pb2.py +111 -0
- tensorbored/compat/proto/tensor_description_pb2.py +38 -0
- tensorbored/compat/proto/tensor_pb2.py +68 -0
- tensorbored/compat/proto/tensor_shape_pb2.py +46 -0
- tensorbored/compat/proto/tfprof_log_pb2.py +307 -0
- tensorbored/compat/proto/trackable_object_graph_pb2.py +90 -0
- tensorbored/compat/proto/types_pb2.py +105 -0
- tensorbored/compat/proto/variable_pb2.py +62 -0
- tensorbored/compat/proto/verifier_config_pb2.py +38 -0
- tensorbored/compat/proto/versions_pb2.py +35 -0
- tensorbored/compat/tensorflow_stub/__init__.py +38 -0
- tensorbored/compat/tensorflow_stub/app.py +124 -0
- tensorbored/compat/tensorflow_stub/compat/__init__.py +131 -0
- tensorbored/compat/tensorflow_stub/compat/v1/__init__.py +20 -0
- tensorbored/compat/tensorflow_stub/dtypes.py +692 -0
- tensorbored/compat/tensorflow_stub/error_codes.py +169 -0
- tensorbored/compat/tensorflow_stub/errors.py +507 -0
- tensorbored/compat/tensorflow_stub/flags.py +124 -0
- tensorbored/compat/tensorflow_stub/io/__init__.py +17 -0
- tensorbored/compat/tensorflow_stub/io/gfile.py +1011 -0
- tensorbored/compat/tensorflow_stub/pywrap_tensorflow.py +285 -0
- tensorbored/compat/tensorflow_stub/tensor_shape.py +1035 -0
- tensorbored/context.py +129 -0
- tensorbored/data/__init__.py +0 -0
- tensorbored/data/grpc_provider.py +365 -0
- tensorbored/data/ingester.py +46 -0
- tensorbored/data/proto/__init__.py +0 -0
- tensorbored/data/proto/data_provider_pb2.py +517 -0
- tensorbored/data/proto/data_provider_pb2_grpc.py +374 -0
- tensorbored/data/provider.py +1365 -0
- tensorbored/data/server_ingester.py +301 -0
- tensorbored/data_compat.py +159 -0
- tensorbored/dataclass_compat.py +224 -0
- tensorbored/default.py +124 -0
- tensorbored/errors.py +130 -0
- tensorbored/lazy.py +99 -0
- tensorbored/main.py +48 -0
- tensorbored/main_lib.py +62 -0
- tensorbored/manager.py +487 -0
- tensorbored/notebook.py +441 -0
- tensorbored/plugin_util.py +266 -0
- tensorbored/plugins/__init__.py +0 -0
- tensorbored/plugins/audio/__init__.py +0 -0
- tensorbored/plugins/audio/audio_plugin.py +229 -0
- tensorbored/plugins/audio/metadata.py +69 -0
- tensorbored/plugins/audio/plugin_data_pb2.py +37 -0
- tensorbored/plugins/audio/summary.py +230 -0
- tensorbored/plugins/audio/summary_v2.py +124 -0
- tensorbored/plugins/base_plugin.py +367 -0
- tensorbored/plugins/core/__init__.py +0 -0
- tensorbored/plugins/core/core_plugin.py +981 -0
- tensorbored/plugins/custom_scalar/__init__.py +0 -0
- tensorbored/plugins/custom_scalar/custom_scalars_plugin.py +320 -0
- tensorbored/plugins/custom_scalar/layout_pb2.py +85 -0
- tensorbored/plugins/custom_scalar/metadata.py +35 -0
- tensorbored/plugins/custom_scalar/summary.py +79 -0
- tensorbored/plugins/debugger_v2/__init__.py +0 -0
- tensorbored/plugins/debugger_v2/debug_data_multiplexer.py +631 -0
- tensorbored/plugins/debugger_v2/debug_data_provider.py +634 -0
- tensorbored/plugins/debugger_v2/debugger_v2_plugin.py +504 -0
- tensorbored/plugins/distribution/__init__.py +0 -0
- tensorbored/plugins/distribution/compressor.py +158 -0
- tensorbored/plugins/distribution/distributions_plugin.py +116 -0
- tensorbored/plugins/distribution/metadata.py +19 -0
- tensorbored/plugins/graph/__init__.py +0 -0
- tensorbored/plugins/graph/graph_util.py +129 -0
- tensorbored/plugins/graph/graphs_plugin.py +336 -0
- tensorbored/plugins/graph/keras_util.py +328 -0
- tensorbored/plugins/graph/metadata.py +42 -0
- tensorbored/plugins/histogram/__init__.py +0 -0
- tensorbored/plugins/histogram/histograms_plugin.py +144 -0
- tensorbored/plugins/histogram/metadata.py +63 -0
- tensorbored/plugins/histogram/plugin_data_pb2.py +34 -0
- tensorbored/plugins/histogram/summary.py +234 -0
- tensorbored/plugins/histogram/summary_v2.py +292 -0
- tensorbored/plugins/hparams/__init__.py +14 -0
- tensorbored/plugins/hparams/_keras.py +93 -0
- tensorbored/plugins/hparams/api.py +130 -0
- tensorbored/plugins/hparams/api_pb2.py +208 -0
- tensorbored/plugins/hparams/backend_context.py +606 -0
- tensorbored/plugins/hparams/download_data.py +158 -0
- tensorbored/plugins/hparams/error.py +26 -0
- tensorbored/plugins/hparams/get_experiment.py +71 -0
- tensorbored/plugins/hparams/hparams_plugin.py +206 -0
- tensorbored/plugins/hparams/hparams_util_pb2.py +69 -0
- tensorbored/plugins/hparams/json_format_compat.py +38 -0
- tensorbored/plugins/hparams/list_metric_evals.py +57 -0
- tensorbored/plugins/hparams/list_session_groups.py +1040 -0
- tensorbored/plugins/hparams/metadata.py +125 -0
- tensorbored/plugins/hparams/metrics.py +41 -0
- tensorbored/plugins/hparams/plugin_data_pb2.py +69 -0
- tensorbored/plugins/hparams/summary.py +205 -0
- tensorbored/plugins/hparams/summary_v2.py +597 -0
- tensorbored/plugins/image/__init__.py +0 -0
- tensorbored/plugins/image/images_plugin.py +232 -0
- tensorbored/plugins/image/metadata.py +65 -0
- tensorbored/plugins/image/plugin_data_pb2.py +34 -0
- tensorbored/plugins/image/summary.py +159 -0
- tensorbored/plugins/image/summary_v2.py +130 -0
- tensorbored/plugins/mesh/__init__.py +14 -0
- tensorbored/plugins/mesh/mesh_plugin.py +292 -0
- tensorbored/plugins/mesh/metadata.py +152 -0
- tensorbored/plugins/mesh/plugin_data_pb2.py +37 -0
- tensorbored/plugins/mesh/summary.py +251 -0
- tensorbored/plugins/mesh/summary_v2.py +214 -0
- tensorbored/plugins/metrics/__init__.py +0 -0
- tensorbored/plugins/metrics/metadata.py +17 -0
- tensorbored/plugins/metrics/metrics_plugin.py +623 -0
- tensorbored/plugins/pr_curve/__init__.py +0 -0
- tensorbored/plugins/pr_curve/metadata.py +75 -0
- tensorbored/plugins/pr_curve/plugin_data_pb2.py +34 -0
- tensorbored/plugins/pr_curve/pr_curves_plugin.py +241 -0
- tensorbored/plugins/pr_curve/summary.py +574 -0
- tensorbored/plugins/profile_redirect/__init__.py +0 -0
- tensorbored/plugins/profile_redirect/profile_redirect_plugin.py +49 -0
- tensorbored/plugins/projector/__init__.py +67 -0
- tensorbored/plugins/projector/metadata.py +26 -0
- tensorbored/plugins/projector/projector_config_pb2.py +54 -0
- tensorbored/plugins/projector/projector_plugin.py +795 -0
- tensorbored/plugins/projector/tf_projector_plugin/index.js +32 -0
- tensorbored/plugins/projector/tf_projector_plugin/projector_binary.html +524 -0
- tensorbored/plugins/projector/tf_projector_plugin/projector_binary.js +15536 -0
- tensorbored/plugins/scalar/__init__.py +0 -0
- tensorbored/plugins/scalar/metadata.py +60 -0
- tensorbored/plugins/scalar/plugin_data_pb2.py +34 -0
- tensorbored/plugins/scalar/scalars_plugin.py +181 -0
- tensorbored/plugins/scalar/summary.py +109 -0
- tensorbored/plugins/scalar/summary_v2.py +124 -0
- tensorbored/plugins/text/__init__.py +0 -0
- tensorbored/plugins/text/metadata.py +62 -0
- tensorbored/plugins/text/plugin_data_pb2.py +34 -0
- tensorbored/plugins/text/summary.py +114 -0
- tensorbored/plugins/text/summary_v2.py +124 -0
- tensorbored/plugins/text/text_plugin.py +288 -0
- tensorbored/plugins/wit_redirect/__init__.py +0 -0
- tensorbored/plugins/wit_redirect/wit_redirect_plugin.py +49 -0
- tensorbored/program.py +910 -0
- tensorbored/summary/__init__.py +35 -0
- tensorbored/summary/_output.py +124 -0
- tensorbored/summary/_tf/__init__.py +14 -0
- tensorbored/summary/_tf/summary/__init__.py +178 -0
- tensorbored/summary/_writer.py +105 -0
- tensorbored/summary/v1.py +51 -0
- tensorbored/summary/v2.py +25 -0
- tensorbored/summary/writer/__init__.py +13 -0
- tensorbored/summary/writer/event_file_writer.py +291 -0
- tensorbored/summary/writer/record_writer.py +50 -0
- tensorbored/util/__init__.py +0 -0
- tensorbored/util/encoder.py +116 -0
- tensorbored/util/grpc_util.py +311 -0
- tensorbored/util/img_mime_type_detector.py +40 -0
- tensorbored/util/io_util.py +20 -0
- tensorbored/util/lazy_tensor_creator.py +110 -0
- tensorbored/util/op_evaluator.py +104 -0
- tensorbored/util/platform_util.py +20 -0
- tensorbored/util/tb_logging.py +24 -0
- tensorbored/util/tensor_util.py +617 -0
- tensorbored/util/timing.py +122 -0
- tensorbored/version.py +21 -0
- tensorbored/webfiles.zip +0 -0
- tensorbored-2.21.0rc1769983804.dist-info/METADATA +49 -0
- tensorbored-2.21.0rc1769983804.dist-info/RECORD +271 -0
- tensorbored-2.21.0rc1769983804.dist-info/WHEEL +5 -0
- tensorbored-2.21.0rc1769983804.dist-info/entry_points.txt +6 -0
- tensorbored-2.21.0rc1769983804.dist-info/licenses/LICENSE +739 -0
- tensorbored-2.21.0rc1769983804.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,638 @@
|
|
|
1
|
+
from itertools import chain
|
|
2
|
+
import re
|
|
3
|
+
import warnings
|
|
4
|
+
|
|
5
|
+
from xml.sax.saxutils import unescape
|
|
6
|
+
|
|
7
|
+
from tensorbored._vendor.bleach import html5lib_shim
|
|
8
|
+
from tensorbored._vendor.bleach import parse_shim
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
#: Set of allowed tags
|
|
12
|
+
ALLOWED_TAGS = frozenset(
|
|
13
|
+
(
|
|
14
|
+
"a",
|
|
15
|
+
"abbr",
|
|
16
|
+
"acronym",
|
|
17
|
+
"b",
|
|
18
|
+
"blockquote",
|
|
19
|
+
"code",
|
|
20
|
+
"em",
|
|
21
|
+
"i",
|
|
22
|
+
"li",
|
|
23
|
+
"ol",
|
|
24
|
+
"strong",
|
|
25
|
+
"ul",
|
|
26
|
+
)
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
#: Map of allowed attributes by tag
|
|
31
|
+
ALLOWED_ATTRIBUTES = {
|
|
32
|
+
"a": ["href", "title"],
|
|
33
|
+
"abbr": ["title"],
|
|
34
|
+
"acronym": ["title"],
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
#: List of allowed protocols
|
|
38
|
+
ALLOWED_PROTOCOLS = frozenset(("http", "https", "mailto"))
|
|
39
|
+
|
|
40
|
+
#: Invisible characters--0 to and including 31 except 9 (tab), 10 (lf), and 13 (cr)
|
|
41
|
+
INVISIBLE_CHARACTERS = "".join(
|
|
42
|
+
[chr(c) for c in chain(range(0, 9), range(11, 13), range(14, 32))]
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
#: Regexp for characters that are invisible
|
|
46
|
+
INVISIBLE_CHARACTERS_RE = re.compile("[" + INVISIBLE_CHARACTERS + "]", re.UNICODE)
|
|
47
|
+
|
|
48
|
+
#: String to replace invisible characters with. This can be a character, a
|
|
49
|
+
#: string, or even a function that takes a Python re matchobj
|
|
50
|
+
INVISIBLE_REPLACEMENT_CHAR = "?"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class NoCssSanitizerWarning(UserWarning):
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class Cleaner:
|
|
58
|
+
"""Cleaner for cleaning HTML fragments of malicious content
|
|
59
|
+
|
|
60
|
+
This cleaner is a security-focused function whose sole purpose is to remove
|
|
61
|
+
malicious content from a string such that it can be displayed as content in
|
|
62
|
+
a web page.
|
|
63
|
+
|
|
64
|
+
To use::
|
|
65
|
+
|
|
66
|
+
from bleach.sanitizer import Cleaner
|
|
67
|
+
|
|
68
|
+
cleaner = Cleaner()
|
|
69
|
+
|
|
70
|
+
for text in all_the_yucky_things:
|
|
71
|
+
sanitized = cleaner.clean(text)
|
|
72
|
+
|
|
73
|
+
.. Note::
|
|
74
|
+
|
|
75
|
+
This cleaner is not designed to use to transform content to be used in
|
|
76
|
+
non-web-page contexts.
|
|
77
|
+
|
|
78
|
+
.. Warning::
|
|
79
|
+
|
|
80
|
+
This cleaner is not thread-safe--the html parser has internal state.
|
|
81
|
+
Create a separate cleaner per thread!
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
def __init__(
|
|
87
|
+
self,
|
|
88
|
+
tags=ALLOWED_TAGS,
|
|
89
|
+
attributes=ALLOWED_ATTRIBUTES,
|
|
90
|
+
protocols=ALLOWED_PROTOCOLS,
|
|
91
|
+
strip=False,
|
|
92
|
+
strip_comments=True,
|
|
93
|
+
filters=None,
|
|
94
|
+
css_sanitizer=None,
|
|
95
|
+
):
|
|
96
|
+
"""Initializes a Cleaner
|
|
97
|
+
|
|
98
|
+
:arg set tags: set of allowed tags; defaults to
|
|
99
|
+
``bleach.sanitizer.ALLOWED_TAGS``
|
|
100
|
+
|
|
101
|
+
:arg dict attributes: allowed attributes; can be a callable, list or dict;
|
|
102
|
+
defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
|
|
103
|
+
|
|
104
|
+
:arg list protocols: allowed list of protocols for links; defaults
|
|
105
|
+
to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
|
|
106
|
+
|
|
107
|
+
:arg bool strip: whether or not to strip disallowed elements
|
|
108
|
+
|
|
109
|
+
:arg bool strip_comments: whether or not to strip HTML comments
|
|
110
|
+
|
|
111
|
+
:arg list filters: list of html5lib Filter classes to pass streamed content through
|
|
112
|
+
|
|
113
|
+
.. seealso:: http://html5lib.readthedocs.io/en/latest/movingparts.html#filters
|
|
114
|
+
|
|
115
|
+
.. Warning::
|
|
116
|
+
|
|
117
|
+
Using filters changes the output of ``bleach.Cleaner.clean``.
|
|
118
|
+
Make sure the way the filters change the output are secure.
|
|
119
|
+
|
|
120
|
+
:arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
|
|
121
|
+
sanitizing style attribute values and style text; defaults to None
|
|
122
|
+
|
|
123
|
+
"""
|
|
124
|
+
self.tags = tags
|
|
125
|
+
self.attributes = attributes
|
|
126
|
+
self.protocols = protocols
|
|
127
|
+
self.strip = strip
|
|
128
|
+
self.strip_comments = strip_comments
|
|
129
|
+
self.filters = filters or []
|
|
130
|
+
self.css_sanitizer = css_sanitizer
|
|
131
|
+
|
|
132
|
+
self.parser = html5lib_shim.BleachHTMLParser(
|
|
133
|
+
tags=self.tags,
|
|
134
|
+
strip=self.strip,
|
|
135
|
+
consume_entities=False,
|
|
136
|
+
namespaceHTMLElements=False,
|
|
137
|
+
)
|
|
138
|
+
self.walker = html5lib_shim.getTreeWalker("etree")
|
|
139
|
+
self.serializer = html5lib_shim.BleachHTMLSerializer(
|
|
140
|
+
quote_attr_values="always",
|
|
141
|
+
omit_optional_tags=False,
|
|
142
|
+
escape_lt_in_attrs=True,
|
|
143
|
+
# We want to leave entities as they are without escaping or
|
|
144
|
+
# resolving or expanding
|
|
145
|
+
resolve_entities=False,
|
|
146
|
+
# Bleach has its own sanitizer, so don't use the html5lib one
|
|
147
|
+
sanitize=False,
|
|
148
|
+
# clean preserves attr order
|
|
149
|
+
alphabetical_attributes=False,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
if css_sanitizer is None:
|
|
153
|
+
# FIXME(willkg): this doesn't handle when attributes or an
|
|
154
|
+
# attributes value is a callable
|
|
155
|
+
attributes_values = []
|
|
156
|
+
if isinstance(attributes, list):
|
|
157
|
+
attributes_values = attributes
|
|
158
|
+
|
|
159
|
+
elif isinstance(attributes, dict):
|
|
160
|
+
attributes_values = []
|
|
161
|
+
for values in attributes.values():
|
|
162
|
+
if isinstance(values, (list, tuple)):
|
|
163
|
+
attributes_values.extend(values)
|
|
164
|
+
|
|
165
|
+
if "style" in attributes_values:
|
|
166
|
+
warnings.warn(
|
|
167
|
+
"'style' attribute specified, but css_sanitizer not set.",
|
|
168
|
+
category=NoCssSanitizerWarning,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
def clean(self, text):
|
|
172
|
+
"""Cleans text and returns sanitized result as unicode
|
|
173
|
+
|
|
174
|
+
:arg str text: text to be cleaned
|
|
175
|
+
|
|
176
|
+
:returns: sanitized text as unicode
|
|
177
|
+
|
|
178
|
+
:raises TypeError: if ``text`` is not a text type
|
|
179
|
+
|
|
180
|
+
"""
|
|
181
|
+
if not isinstance(text, str):
|
|
182
|
+
message = (
|
|
183
|
+
f"argument cannot be of {text.__class__.__name__!r} type, "
|
|
184
|
+
+ "must be of text type"
|
|
185
|
+
)
|
|
186
|
+
raise TypeError(message)
|
|
187
|
+
|
|
188
|
+
if not text:
|
|
189
|
+
return ""
|
|
190
|
+
|
|
191
|
+
dom = self.parser.parseFragment(text)
|
|
192
|
+
filtered = BleachSanitizerFilter(
|
|
193
|
+
source=self.walker(dom),
|
|
194
|
+
allowed_tags=self.tags,
|
|
195
|
+
attributes=self.attributes,
|
|
196
|
+
strip_disallowed_tags=self.strip,
|
|
197
|
+
strip_html_comments=self.strip_comments,
|
|
198
|
+
css_sanitizer=self.css_sanitizer,
|
|
199
|
+
allowed_protocols=self.protocols,
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Apply any filters after the BleachSanitizerFilter
|
|
203
|
+
for filter_class in self.filters:
|
|
204
|
+
filtered = filter_class(source=filtered)
|
|
205
|
+
|
|
206
|
+
return self.serializer.render(filtered)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def attribute_filter_factory(attributes):
|
|
210
|
+
"""Generates attribute filter function for the given attributes value
|
|
211
|
+
|
|
212
|
+
The attributes value can take one of several shapes. This returns a filter
|
|
213
|
+
function appropriate to the attributes value. One nice thing about this is
|
|
214
|
+
that there's less if/then shenanigans in the ``allow_token`` method.
|
|
215
|
+
|
|
216
|
+
"""
|
|
217
|
+
if callable(attributes):
|
|
218
|
+
return attributes
|
|
219
|
+
|
|
220
|
+
if isinstance(attributes, dict):
|
|
221
|
+
|
|
222
|
+
def _attr_filter(tag, attr, value):
|
|
223
|
+
if tag in attributes:
|
|
224
|
+
attr_val = attributes[tag]
|
|
225
|
+
if callable(attr_val):
|
|
226
|
+
return attr_val(tag, attr, value)
|
|
227
|
+
|
|
228
|
+
if attr in attr_val:
|
|
229
|
+
return True
|
|
230
|
+
|
|
231
|
+
if "*" in attributes:
|
|
232
|
+
attr_val = attributes["*"]
|
|
233
|
+
if callable(attr_val):
|
|
234
|
+
return attr_val(tag, attr, value)
|
|
235
|
+
|
|
236
|
+
return attr in attr_val
|
|
237
|
+
|
|
238
|
+
return False
|
|
239
|
+
|
|
240
|
+
return _attr_filter
|
|
241
|
+
|
|
242
|
+
if isinstance(attributes, list):
|
|
243
|
+
|
|
244
|
+
def _attr_filter(tag, attr, value):
|
|
245
|
+
return attr in attributes
|
|
246
|
+
|
|
247
|
+
return _attr_filter
|
|
248
|
+
|
|
249
|
+
raise ValueError("attributes needs to be a callable, a list or a dict")
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
|
253
|
+
"""html5lib Filter that sanitizes text
|
|
254
|
+
|
|
255
|
+
This filter can be used anywhere html5lib filters can be used.
|
|
256
|
+
|
|
257
|
+
"""
|
|
258
|
+
|
|
259
|
+
def __init__(
|
|
260
|
+
self,
|
|
261
|
+
source,
|
|
262
|
+
allowed_tags=ALLOWED_TAGS,
|
|
263
|
+
attributes=ALLOWED_ATTRIBUTES,
|
|
264
|
+
allowed_protocols=ALLOWED_PROTOCOLS,
|
|
265
|
+
attr_val_is_uri=html5lib_shim.attr_val_is_uri,
|
|
266
|
+
svg_attr_val_allows_ref=html5lib_shim.svg_attr_val_allows_ref,
|
|
267
|
+
svg_allow_local_href=html5lib_shim.svg_allow_local_href,
|
|
268
|
+
strip_disallowed_tags=False,
|
|
269
|
+
strip_html_comments=True,
|
|
270
|
+
css_sanitizer=None,
|
|
271
|
+
):
|
|
272
|
+
"""Creates a BleachSanitizerFilter instance
|
|
273
|
+
|
|
274
|
+
:arg source: html5lib TreeWalker stream as an html5lib TreeWalker
|
|
275
|
+
|
|
276
|
+
:arg set allowed_tags: set of allowed tags; defaults to
|
|
277
|
+
``bleach.sanitizer.ALLOWED_TAGS``
|
|
278
|
+
|
|
279
|
+
:arg dict attributes: allowed attributes; can be a callable, list or dict;
|
|
280
|
+
defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
|
|
281
|
+
|
|
282
|
+
:arg list allowed_protocols: allowed list of protocols for links; defaults
|
|
283
|
+
to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
|
|
284
|
+
|
|
285
|
+
:arg attr_val_is_uri: set of attributes that have URI values
|
|
286
|
+
|
|
287
|
+
:arg svg_attr_val_allows_ref: set of SVG attributes that can have
|
|
288
|
+
references
|
|
289
|
+
|
|
290
|
+
:arg svg_allow_local_href: set of SVG elements that can have local
|
|
291
|
+
hrefs
|
|
292
|
+
|
|
293
|
+
:arg bool strip_disallowed_tags: whether or not to strip disallowed
|
|
294
|
+
tags
|
|
295
|
+
|
|
296
|
+
:arg bool strip_html_comments: whether or not to strip HTML comments
|
|
297
|
+
|
|
298
|
+
:arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
|
|
299
|
+
sanitizing style attribute values and style text; defaults to None
|
|
300
|
+
|
|
301
|
+
"""
|
|
302
|
+
# NOTE(willkg): This is the superclass of
|
|
303
|
+
# html5lib.filters.sanitizer.Filter. We call this directly skipping the
|
|
304
|
+
# __init__ for html5lib.filters.sanitizer.Filter because that does
|
|
305
|
+
# things we don't need to do and kicks up the deprecation warning for
|
|
306
|
+
# using Sanitizer.
|
|
307
|
+
html5lib_shim.Filter.__init__(self, source)
|
|
308
|
+
|
|
309
|
+
self.allowed_tags = frozenset(allowed_tags)
|
|
310
|
+
self.allowed_protocols = frozenset(allowed_protocols)
|
|
311
|
+
|
|
312
|
+
self.attr_filter = attribute_filter_factory(attributes)
|
|
313
|
+
self.strip_disallowed_tags = strip_disallowed_tags
|
|
314
|
+
self.strip_html_comments = strip_html_comments
|
|
315
|
+
|
|
316
|
+
self.attr_val_is_uri = attr_val_is_uri
|
|
317
|
+
self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
|
|
318
|
+
self.css_sanitizer = css_sanitizer
|
|
319
|
+
self.svg_allow_local_href = svg_allow_local_href
|
|
320
|
+
|
|
321
|
+
def sanitize_stream(self, token_iterator):
|
|
322
|
+
for token in token_iterator:
|
|
323
|
+
ret = self.sanitize_token(token)
|
|
324
|
+
|
|
325
|
+
if not ret:
|
|
326
|
+
continue
|
|
327
|
+
|
|
328
|
+
if isinstance(ret, list):
|
|
329
|
+
yield from ret
|
|
330
|
+
else:
|
|
331
|
+
yield ret
|
|
332
|
+
|
|
333
|
+
def merge_characters(self, token_iterator):
|
|
334
|
+
"""Merge consecutive Characters tokens in a stream"""
|
|
335
|
+
characters_buffer = []
|
|
336
|
+
|
|
337
|
+
for token in token_iterator:
|
|
338
|
+
if characters_buffer:
|
|
339
|
+
if token["type"] == "Characters":
|
|
340
|
+
characters_buffer.append(token)
|
|
341
|
+
continue
|
|
342
|
+
else:
|
|
343
|
+
# Merge all the characters tokens together into one and then
|
|
344
|
+
# operate on it.
|
|
345
|
+
new_token = {
|
|
346
|
+
"data": "".join(
|
|
347
|
+
[char_token["data"] for char_token in characters_buffer]
|
|
348
|
+
),
|
|
349
|
+
"type": "Characters",
|
|
350
|
+
}
|
|
351
|
+
characters_buffer = []
|
|
352
|
+
yield new_token
|
|
353
|
+
|
|
354
|
+
elif token["type"] == "Characters":
|
|
355
|
+
characters_buffer.append(token)
|
|
356
|
+
continue
|
|
357
|
+
|
|
358
|
+
yield token
|
|
359
|
+
|
|
360
|
+
new_token = {
|
|
361
|
+
"data": "".join([char_token["data"] for char_token in characters_buffer]),
|
|
362
|
+
"type": "Characters",
|
|
363
|
+
}
|
|
364
|
+
yield new_token
|
|
365
|
+
|
|
366
|
+
def __iter__(self):
|
|
367
|
+
return self.merge_characters(
|
|
368
|
+
self.sanitize_stream(html5lib_shim.Filter.__iter__(self))
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
def sanitize_token(self, token):
|
|
372
|
+
"""Sanitize a token either by HTML-encoding or dropping.
|
|
373
|
+
|
|
374
|
+
Unlike sanitizer.Filter, allowed_attributes can be a dict of {'tag':
|
|
375
|
+
['attribute', 'pairs'], 'tag': callable}.
|
|
376
|
+
|
|
377
|
+
Here callable is a function with two arguments of attribute name and
|
|
378
|
+
value. It should return true of false.
|
|
379
|
+
|
|
380
|
+
Also gives the option to strip tags instead of encoding.
|
|
381
|
+
|
|
382
|
+
:arg dict token: token to sanitize
|
|
383
|
+
|
|
384
|
+
:returns: token or list of tokens
|
|
385
|
+
|
|
386
|
+
"""
|
|
387
|
+
token_type = token["type"]
|
|
388
|
+
if token_type in ["StartTag", "EndTag", "EmptyTag"]:
|
|
389
|
+
if token["name"] in self.allowed_tags:
|
|
390
|
+
return self.allow_token(token)
|
|
391
|
+
|
|
392
|
+
elif self.strip_disallowed_tags:
|
|
393
|
+
return None
|
|
394
|
+
|
|
395
|
+
else:
|
|
396
|
+
return self.disallowed_token(token)
|
|
397
|
+
|
|
398
|
+
elif token_type == "Comment":
|
|
399
|
+
if not self.strip_html_comments:
|
|
400
|
+
# call lxml.sax.saxutils to escape &, <, and > in addition to " and '
|
|
401
|
+
token["data"] = html5lib_shim.escape(
|
|
402
|
+
token["data"], entities={'"': """, "'": "'"}
|
|
403
|
+
)
|
|
404
|
+
return token
|
|
405
|
+
else:
|
|
406
|
+
return None
|
|
407
|
+
|
|
408
|
+
elif token_type == "Characters":
|
|
409
|
+
return self.sanitize_characters(token)
|
|
410
|
+
|
|
411
|
+
else:
|
|
412
|
+
return token
|
|
413
|
+
|
|
414
|
+
def sanitize_characters(self, token):
|
|
415
|
+
"""Handles Characters tokens
|
|
416
|
+
|
|
417
|
+
Our overridden tokenizer doesn't do anything with entities. However,
|
|
418
|
+
that means that the serializer will convert all ``&`` in Characters
|
|
419
|
+
tokens to ``&``.
|
|
420
|
+
|
|
421
|
+
Since we don't want that, we extract entities here and convert them to
|
|
422
|
+
Entity tokens so the serializer will let them be.
|
|
423
|
+
|
|
424
|
+
:arg token: the Characters token to work on
|
|
425
|
+
|
|
426
|
+
:returns: a list of tokens
|
|
427
|
+
|
|
428
|
+
"""
|
|
429
|
+
data = token.get("data", "")
|
|
430
|
+
|
|
431
|
+
if not data:
|
|
432
|
+
return token
|
|
433
|
+
|
|
434
|
+
data = INVISIBLE_CHARACTERS_RE.sub(INVISIBLE_REPLACEMENT_CHAR, data)
|
|
435
|
+
token["data"] = data
|
|
436
|
+
|
|
437
|
+
# If there isn't a & in the data, we can return now
|
|
438
|
+
if "&" not in data:
|
|
439
|
+
return token
|
|
440
|
+
|
|
441
|
+
new_tokens = []
|
|
442
|
+
|
|
443
|
+
# For each possible entity that starts with a "&", we try to extract an
|
|
444
|
+
# actual entity and re-tokenize accordingly
|
|
445
|
+
for part in html5lib_shim.next_possible_entity(data):
|
|
446
|
+
if not part:
|
|
447
|
+
continue
|
|
448
|
+
|
|
449
|
+
if part.startswith("&"):
|
|
450
|
+
entity = html5lib_shim.match_entity(part)
|
|
451
|
+
if entity is not None:
|
|
452
|
+
if entity == "amp":
|
|
453
|
+
# LinkifyFilter can't match urls across token boundaries
|
|
454
|
+
# which is problematic with & since that shows up in
|
|
455
|
+
# querystrings all the time. This special-cases &
|
|
456
|
+
# and converts it to a & and sticks it in as a
|
|
457
|
+
# Characters token. It'll get merged with surrounding
|
|
458
|
+
# tokens in the BleachSanitizerfilter.__iter__ and
|
|
459
|
+
# escaped in the serializer.
|
|
460
|
+
new_tokens.append({"type": "Characters", "data": "&"})
|
|
461
|
+
else:
|
|
462
|
+
new_tokens.append({"type": "Entity", "name": entity})
|
|
463
|
+
|
|
464
|
+
# Length of the entity plus 2--one for & at the beginning
|
|
465
|
+
# and one for ; at the end
|
|
466
|
+
remainder = part[len(entity) + 2 :]
|
|
467
|
+
if remainder:
|
|
468
|
+
new_tokens.append({"type": "Characters", "data": remainder})
|
|
469
|
+
continue
|
|
470
|
+
|
|
471
|
+
new_tokens.append({"type": "Characters", "data": part})
|
|
472
|
+
|
|
473
|
+
return new_tokens
|
|
474
|
+
|
|
475
|
+
def sanitize_uri_value(self, value, allowed_protocols):
|
|
476
|
+
"""Checks a uri value to see if it's allowed
|
|
477
|
+
|
|
478
|
+
:arg value: the uri value to sanitize
|
|
479
|
+
:arg allowed_protocols: list of allowed protocols
|
|
480
|
+
|
|
481
|
+
:returns: allowed value or None
|
|
482
|
+
|
|
483
|
+
"""
|
|
484
|
+
# NOTE(willkg): This transforms the value into a normalized one that's
|
|
485
|
+
# easier to match and verify, but shouldn't get returned since it's
|
|
486
|
+
# vastly different than the original value.
|
|
487
|
+
|
|
488
|
+
# Convert all character entities in the value
|
|
489
|
+
normalized_uri = html5lib_shim.convert_entities(value)
|
|
490
|
+
|
|
491
|
+
# Nix backtick, space characters, and control characters
|
|
492
|
+
normalized_uri = re.sub(r"[`\000-\040\177-\240\s]+", "", normalized_uri)
|
|
493
|
+
|
|
494
|
+
# Remove REPLACEMENT characters
|
|
495
|
+
normalized_uri = normalized_uri.replace("\ufffd", "")
|
|
496
|
+
|
|
497
|
+
# Lowercase it--this breaks the value, but makes it easier to match
|
|
498
|
+
# against
|
|
499
|
+
normalized_uri = normalized_uri.lower()
|
|
500
|
+
|
|
501
|
+
try:
|
|
502
|
+
# Drop attributes with uri values that have protocols that aren't
|
|
503
|
+
# allowed
|
|
504
|
+
parsed = parse_shim.urlparse(normalized_uri)
|
|
505
|
+
except ValueError:
|
|
506
|
+
# URI is impossible to parse, therefore it's not allowed
|
|
507
|
+
return None
|
|
508
|
+
|
|
509
|
+
if parsed.scheme:
|
|
510
|
+
# If urlparse found a scheme, check that
|
|
511
|
+
if parsed.scheme in allowed_protocols:
|
|
512
|
+
return value
|
|
513
|
+
|
|
514
|
+
else:
|
|
515
|
+
# Allow uris that are just an anchor
|
|
516
|
+
if normalized_uri.startswith("#"):
|
|
517
|
+
return value
|
|
518
|
+
|
|
519
|
+
# Handle protocols that urlparse doesn't recognize like "myprotocol"
|
|
520
|
+
if (
|
|
521
|
+
":" in normalized_uri
|
|
522
|
+
and normalized_uri.split(":")[0] in allowed_protocols
|
|
523
|
+
):
|
|
524
|
+
return value
|
|
525
|
+
|
|
526
|
+
# If there's no protocol/scheme specified, then assume it's "http" or
|
|
527
|
+
# "https" and see if that's allowed
|
|
528
|
+
if "http" in allowed_protocols or "https" in allowed_protocols:
|
|
529
|
+
return value
|
|
530
|
+
|
|
531
|
+
return None
|
|
532
|
+
|
|
533
|
+
def allow_token(self, token):
|
|
534
|
+
"""Handles the case where we're allowing the tag"""
|
|
535
|
+
if "data" in token:
|
|
536
|
+
# Loop through all the attributes and drop the ones that are not
|
|
537
|
+
# allowed, are unsafe or break other rules. Additionally, fix
|
|
538
|
+
# attribute values that need fixing.
|
|
539
|
+
#
|
|
540
|
+
# At the end of this loop, we have the final set of attributes
|
|
541
|
+
# we're keeping.
|
|
542
|
+
attrs = {}
|
|
543
|
+
for namespaced_name, val in token["data"].items():
|
|
544
|
+
namespace, name = namespaced_name
|
|
545
|
+
|
|
546
|
+
# Drop attributes that are not explicitly allowed
|
|
547
|
+
#
|
|
548
|
+
# NOTE(willkg): We pass in the attribute name--not a namespaced
|
|
549
|
+
# name.
|
|
550
|
+
if not self.attr_filter(token["name"], name, val):
|
|
551
|
+
continue
|
|
552
|
+
|
|
553
|
+
# Drop attributes with uri values that use a disallowed protocol
|
|
554
|
+
# Sanitize attributes with uri values
|
|
555
|
+
if namespaced_name in self.attr_val_is_uri:
|
|
556
|
+
new_value = self.sanitize_uri_value(val, self.allowed_protocols)
|
|
557
|
+
if new_value is None:
|
|
558
|
+
continue
|
|
559
|
+
val = new_value
|
|
560
|
+
|
|
561
|
+
# Drop values in svg attrs with non-local IRIs
|
|
562
|
+
if namespaced_name in self.svg_attr_val_allows_ref:
|
|
563
|
+
new_val = re.sub(r"url\s*\(\s*[^#\s][^)]+?\)", " ", unescape(val))
|
|
564
|
+
new_val = new_val.strip()
|
|
565
|
+
if not new_val:
|
|
566
|
+
continue
|
|
567
|
+
|
|
568
|
+
else:
|
|
569
|
+
# Replace the val with the unescaped version because
|
|
570
|
+
# it's a iri
|
|
571
|
+
val = new_val
|
|
572
|
+
|
|
573
|
+
# Drop href and xlink:href attr for svg elements with non-local IRIs
|
|
574
|
+
if (None, token["name"]) in self.svg_allow_local_href:
|
|
575
|
+
if namespaced_name in [
|
|
576
|
+
(None, "href"),
|
|
577
|
+
(html5lib_shim.namespaces["xlink"], "href"),
|
|
578
|
+
]:
|
|
579
|
+
if re.search(r"^\s*[^#\s]", val):
|
|
580
|
+
continue
|
|
581
|
+
|
|
582
|
+
# If it's a style attribute, sanitize it
|
|
583
|
+
if namespaced_name == (None, "style"):
|
|
584
|
+
if self.css_sanitizer:
|
|
585
|
+
val = self.css_sanitizer.sanitize_css(val)
|
|
586
|
+
else:
|
|
587
|
+
# FIXME(willkg): if style is allowed, but no
|
|
588
|
+
# css_sanitizer was set up, then this is probably a
|
|
589
|
+
# mistake and we should raise an error here
|
|
590
|
+
#
|
|
591
|
+
# For now, we're going to set the value to "" because
|
|
592
|
+
# there was no sanitizer set
|
|
593
|
+
val = ""
|
|
594
|
+
|
|
595
|
+
# At this point, we want to keep the attribute, so add it in
|
|
596
|
+
attrs[namespaced_name] = val
|
|
597
|
+
|
|
598
|
+
token["data"] = attrs
|
|
599
|
+
|
|
600
|
+
return token
|
|
601
|
+
|
|
602
|
+
def disallowed_token(self, token):
|
|
603
|
+
token_type = token["type"]
|
|
604
|
+
if token_type == "EndTag":
|
|
605
|
+
token["data"] = f"</{token['name']}>"
|
|
606
|
+
|
|
607
|
+
elif token["data"]:
|
|
608
|
+
assert token_type in ("StartTag", "EmptyTag")
|
|
609
|
+
attrs = []
|
|
610
|
+
for (ns, name), v in token["data"].items():
|
|
611
|
+
# If we end up with a namespace, but no name, switch them so we
|
|
612
|
+
# have a valid name to use.
|
|
613
|
+
if ns and not name:
|
|
614
|
+
ns, name = name, ns
|
|
615
|
+
|
|
616
|
+
# Figure out namespaced name if the namespace is appropriate
|
|
617
|
+
# and exists; if the ns isn't in prefixes, then drop it.
|
|
618
|
+
if ns is None or ns not in html5lib_shim.prefixes:
|
|
619
|
+
namespaced_name = name
|
|
620
|
+
else:
|
|
621
|
+
namespaced_name = f"{html5lib_shim.prefixes[ns]}:{name}"
|
|
622
|
+
|
|
623
|
+
# NOTE(willkg): HTMLSerializer escapes attribute values
|
|
624
|
+
# already, so if we do it here (like HTMLSerializer does),
|
|
625
|
+
# then we end up double-escaping.
|
|
626
|
+
attrs.append(f' {namespaced_name}="{v}"')
|
|
627
|
+
token["data"] = f"<{token['name']}{''.join(attrs)}>"
|
|
628
|
+
|
|
629
|
+
else:
|
|
630
|
+
token["data"] = f"<{token['name']}>"
|
|
631
|
+
|
|
632
|
+
if token.get("selfClosing"):
|
|
633
|
+
token["data"] = f"{token['data'][:-1]}/>"
|
|
634
|
+
|
|
635
|
+
token["type"] = "Characters"
|
|
636
|
+
|
|
637
|
+
del token["name"]
|
|
638
|
+
return token
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Replacement module for what html5lib uses six for.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import http.client
|
|
6
|
+
import operator
|
|
7
|
+
import urllib
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
PY3 = True
|
|
11
|
+
binary_type = bytes
|
|
12
|
+
string_types = (str,)
|
|
13
|
+
text_type = str
|
|
14
|
+
unichr = chr
|
|
15
|
+
viewkeys = operator.methodcaller("keys")
|
|
16
|
+
|
|
17
|
+
http_client = http.client
|
|
18
|
+
urllib = urllib
|
|
19
|
+
urllib_parse = urllib.parse
|