tensorbored 2.21.0rc1769983804__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (271) hide show
  1. tensorbored/__init__.py +112 -0
  2. tensorbored/_vendor/__init__.py +0 -0
  3. tensorbored/_vendor/bleach/__init__.py +125 -0
  4. tensorbored/_vendor/bleach/_vendor/__init__.py +0 -0
  5. tensorbored/_vendor/bleach/_vendor/html5lib/__init__.py +35 -0
  6. tensorbored/_vendor/bleach/_vendor/html5lib/_ihatexml.py +289 -0
  7. tensorbored/_vendor/bleach/_vendor/html5lib/_inputstream.py +918 -0
  8. tensorbored/_vendor/bleach/_vendor/html5lib/_tokenizer.py +1735 -0
  9. tensorbored/_vendor/bleach/_vendor/html5lib/_trie/__init__.py +5 -0
  10. tensorbored/_vendor/bleach/_vendor/html5lib/_trie/_base.py +40 -0
  11. tensorbored/_vendor/bleach/_vendor/html5lib/_trie/py.py +67 -0
  12. tensorbored/_vendor/bleach/_vendor/html5lib/_utils.py +159 -0
  13. tensorbored/_vendor/bleach/_vendor/html5lib/constants.py +2946 -0
  14. tensorbored/_vendor/bleach/_vendor/html5lib/filters/__init__.py +0 -0
  15. tensorbored/_vendor/bleach/_vendor/html5lib/filters/alphabeticalattributes.py +29 -0
  16. tensorbored/_vendor/bleach/_vendor/html5lib/filters/base.py +12 -0
  17. tensorbored/_vendor/bleach/_vendor/html5lib/filters/inject_meta_charset.py +73 -0
  18. tensorbored/_vendor/bleach/_vendor/html5lib/filters/lint.py +93 -0
  19. tensorbored/_vendor/bleach/_vendor/html5lib/filters/optionaltags.py +207 -0
  20. tensorbored/_vendor/bleach/_vendor/html5lib/filters/sanitizer.py +916 -0
  21. tensorbored/_vendor/bleach/_vendor/html5lib/filters/whitespace.py +38 -0
  22. tensorbored/_vendor/bleach/_vendor/html5lib/html5parser.py +2795 -0
  23. tensorbored/_vendor/bleach/_vendor/html5lib/serializer.py +409 -0
  24. tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/__init__.py +30 -0
  25. tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/genshi.py +54 -0
  26. tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/sax.py +50 -0
  27. tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/__init__.py +88 -0
  28. tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/base.py +417 -0
  29. tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/dom.py +239 -0
  30. tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/etree.py +343 -0
  31. tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/etree_lxml.py +392 -0
  32. tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/__init__.py +154 -0
  33. tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/base.py +252 -0
  34. tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/dom.py +43 -0
  35. tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/etree.py +131 -0
  36. tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/etree_lxml.py +215 -0
  37. tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/genshi.py +69 -0
  38. tensorbored/_vendor/bleach/_vendor/parse.py +1078 -0
  39. tensorbored/_vendor/bleach/callbacks.py +32 -0
  40. tensorbored/_vendor/bleach/html5lib_shim.py +757 -0
  41. tensorbored/_vendor/bleach/linkifier.py +633 -0
  42. tensorbored/_vendor/bleach/parse_shim.py +1 -0
  43. tensorbored/_vendor/bleach/sanitizer.py +638 -0
  44. tensorbored/_vendor/bleach/six_shim.py +19 -0
  45. tensorbored/_vendor/webencodings/__init__.py +342 -0
  46. tensorbored/_vendor/webencodings/labels.py +231 -0
  47. tensorbored/_vendor/webencodings/mklabels.py +59 -0
  48. tensorbored/_vendor/webencodings/x_user_defined.py +325 -0
  49. tensorbored/assets.py +36 -0
  50. tensorbored/auth.py +102 -0
  51. tensorbored/backend/__init__.py +0 -0
  52. tensorbored/backend/application.py +604 -0
  53. tensorbored/backend/auth_context_middleware.py +38 -0
  54. tensorbored/backend/client_feature_flags.py +113 -0
  55. tensorbored/backend/empty_path_redirect.py +46 -0
  56. tensorbored/backend/event_processing/__init__.py +0 -0
  57. tensorbored/backend/event_processing/data_ingester.py +276 -0
  58. tensorbored/backend/event_processing/data_provider.py +535 -0
  59. tensorbored/backend/event_processing/directory_loader.py +142 -0
  60. tensorbored/backend/event_processing/directory_watcher.py +272 -0
  61. tensorbored/backend/event_processing/event_accumulator.py +950 -0
  62. tensorbored/backend/event_processing/event_file_inspector.py +463 -0
  63. tensorbored/backend/event_processing/event_file_loader.py +292 -0
  64. tensorbored/backend/event_processing/event_multiplexer.py +521 -0
  65. tensorbored/backend/event_processing/event_util.py +68 -0
  66. tensorbored/backend/event_processing/io_wrapper.py +223 -0
  67. tensorbored/backend/event_processing/plugin_asset_util.py +104 -0
  68. tensorbored/backend/event_processing/plugin_event_accumulator.py +721 -0
  69. tensorbored/backend/event_processing/plugin_event_multiplexer.py +522 -0
  70. tensorbored/backend/event_processing/reservoir.py +266 -0
  71. tensorbored/backend/event_processing/tag_types.py +29 -0
  72. tensorbored/backend/experiment_id.py +71 -0
  73. tensorbored/backend/experimental_plugin.py +51 -0
  74. tensorbored/backend/http_util.py +263 -0
  75. tensorbored/backend/json_util.py +70 -0
  76. tensorbored/backend/path_prefix.py +67 -0
  77. tensorbored/backend/process_graph.py +74 -0
  78. tensorbored/backend/security_validator.py +202 -0
  79. tensorbored/compat/__init__.py +69 -0
  80. tensorbored/compat/proto/__init__.py +0 -0
  81. tensorbored/compat/proto/allocation_description_pb2.py +35 -0
  82. tensorbored/compat/proto/api_def_pb2.py +82 -0
  83. tensorbored/compat/proto/attr_value_pb2.py +80 -0
  84. tensorbored/compat/proto/cluster_pb2.py +58 -0
  85. tensorbored/compat/proto/config_pb2.py +271 -0
  86. tensorbored/compat/proto/coordination_config_pb2.py +45 -0
  87. tensorbored/compat/proto/cost_graph_pb2.py +87 -0
  88. tensorbored/compat/proto/cpp_shape_inference_pb2.py +70 -0
  89. tensorbored/compat/proto/debug_pb2.py +65 -0
  90. tensorbored/compat/proto/event_pb2.py +149 -0
  91. tensorbored/compat/proto/full_type_pb2.py +74 -0
  92. tensorbored/compat/proto/function_pb2.py +157 -0
  93. tensorbored/compat/proto/graph_debug_info_pb2.py +111 -0
  94. tensorbored/compat/proto/graph_pb2.py +41 -0
  95. tensorbored/compat/proto/histogram_pb2.py +39 -0
  96. tensorbored/compat/proto/meta_graph_pb2.py +254 -0
  97. tensorbored/compat/proto/node_def_pb2.py +61 -0
  98. tensorbored/compat/proto/op_def_pb2.py +81 -0
  99. tensorbored/compat/proto/resource_handle_pb2.py +48 -0
  100. tensorbored/compat/proto/rewriter_config_pb2.py +93 -0
  101. tensorbored/compat/proto/rpc_options_pb2.py +35 -0
  102. tensorbored/compat/proto/saved_object_graph_pb2.py +193 -0
  103. tensorbored/compat/proto/saver_pb2.py +38 -0
  104. tensorbored/compat/proto/step_stats_pb2.py +116 -0
  105. tensorbored/compat/proto/struct_pb2.py +144 -0
  106. tensorbored/compat/proto/summary_pb2.py +111 -0
  107. tensorbored/compat/proto/tensor_description_pb2.py +38 -0
  108. tensorbored/compat/proto/tensor_pb2.py +68 -0
  109. tensorbored/compat/proto/tensor_shape_pb2.py +46 -0
  110. tensorbored/compat/proto/tfprof_log_pb2.py +307 -0
  111. tensorbored/compat/proto/trackable_object_graph_pb2.py +90 -0
  112. tensorbored/compat/proto/types_pb2.py +105 -0
  113. tensorbored/compat/proto/variable_pb2.py +62 -0
  114. tensorbored/compat/proto/verifier_config_pb2.py +38 -0
  115. tensorbored/compat/proto/versions_pb2.py +35 -0
  116. tensorbored/compat/tensorflow_stub/__init__.py +38 -0
  117. tensorbored/compat/tensorflow_stub/app.py +124 -0
  118. tensorbored/compat/tensorflow_stub/compat/__init__.py +131 -0
  119. tensorbored/compat/tensorflow_stub/compat/v1/__init__.py +20 -0
  120. tensorbored/compat/tensorflow_stub/dtypes.py +692 -0
  121. tensorbored/compat/tensorflow_stub/error_codes.py +169 -0
  122. tensorbored/compat/tensorflow_stub/errors.py +507 -0
  123. tensorbored/compat/tensorflow_stub/flags.py +124 -0
  124. tensorbored/compat/tensorflow_stub/io/__init__.py +17 -0
  125. tensorbored/compat/tensorflow_stub/io/gfile.py +1011 -0
  126. tensorbored/compat/tensorflow_stub/pywrap_tensorflow.py +285 -0
  127. tensorbored/compat/tensorflow_stub/tensor_shape.py +1035 -0
  128. tensorbored/context.py +129 -0
  129. tensorbored/data/__init__.py +0 -0
  130. tensorbored/data/grpc_provider.py +365 -0
  131. tensorbored/data/ingester.py +46 -0
  132. tensorbored/data/proto/__init__.py +0 -0
  133. tensorbored/data/proto/data_provider_pb2.py +517 -0
  134. tensorbored/data/proto/data_provider_pb2_grpc.py +374 -0
  135. tensorbored/data/provider.py +1365 -0
  136. tensorbored/data/server_ingester.py +301 -0
  137. tensorbored/data_compat.py +159 -0
  138. tensorbored/dataclass_compat.py +224 -0
  139. tensorbored/default.py +124 -0
  140. tensorbored/errors.py +130 -0
  141. tensorbored/lazy.py +99 -0
  142. tensorbored/main.py +48 -0
  143. tensorbored/main_lib.py +62 -0
  144. tensorbored/manager.py +487 -0
  145. tensorbored/notebook.py +441 -0
  146. tensorbored/plugin_util.py +266 -0
  147. tensorbored/plugins/__init__.py +0 -0
  148. tensorbored/plugins/audio/__init__.py +0 -0
  149. tensorbored/plugins/audio/audio_plugin.py +229 -0
  150. tensorbored/plugins/audio/metadata.py +69 -0
  151. tensorbored/plugins/audio/plugin_data_pb2.py +37 -0
  152. tensorbored/plugins/audio/summary.py +230 -0
  153. tensorbored/plugins/audio/summary_v2.py +124 -0
  154. tensorbored/plugins/base_plugin.py +367 -0
  155. tensorbored/plugins/core/__init__.py +0 -0
  156. tensorbored/plugins/core/core_plugin.py +981 -0
  157. tensorbored/plugins/custom_scalar/__init__.py +0 -0
  158. tensorbored/plugins/custom_scalar/custom_scalars_plugin.py +320 -0
  159. tensorbored/plugins/custom_scalar/layout_pb2.py +85 -0
  160. tensorbored/plugins/custom_scalar/metadata.py +35 -0
  161. tensorbored/plugins/custom_scalar/summary.py +79 -0
  162. tensorbored/plugins/debugger_v2/__init__.py +0 -0
  163. tensorbored/plugins/debugger_v2/debug_data_multiplexer.py +631 -0
  164. tensorbored/plugins/debugger_v2/debug_data_provider.py +634 -0
  165. tensorbored/plugins/debugger_v2/debugger_v2_plugin.py +504 -0
  166. tensorbored/plugins/distribution/__init__.py +0 -0
  167. tensorbored/plugins/distribution/compressor.py +158 -0
  168. tensorbored/plugins/distribution/distributions_plugin.py +116 -0
  169. tensorbored/plugins/distribution/metadata.py +19 -0
  170. tensorbored/plugins/graph/__init__.py +0 -0
  171. tensorbored/plugins/graph/graph_util.py +129 -0
  172. tensorbored/plugins/graph/graphs_plugin.py +336 -0
  173. tensorbored/plugins/graph/keras_util.py +328 -0
  174. tensorbored/plugins/graph/metadata.py +42 -0
  175. tensorbored/plugins/histogram/__init__.py +0 -0
  176. tensorbored/plugins/histogram/histograms_plugin.py +144 -0
  177. tensorbored/plugins/histogram/metadata.py +63 -0
  178. tensorbored/plugins/histogram/plugin_data_pb2.py +34 -0
  179. tensorbored/plugins/histogram/summary.py +234 -0
  180. tensorbored/plugins/histogram/summary_v2.py +292 -0
  181. tensorbored/plugins/hparams/__init__.py +14 -0
  182. tensorbored/plugins/hparams/_keras.py +93 -0
  183. tensorbored/plugins/hparams/api.py +130 -0
  184. tensorbored/plugins/hparams/api_pb2.py +208 -0
  185. tensorbored/plugins/hparams/backend_context.py +606 -0
  186. tensorbored/plugins/hparams/download_data.py +158 -0
  187. tensorbored/plugins/hparams/error.py +26 -0
  188. tensorbored/plugins/hparams/get_experiment.py +71 -0
  189. tensorbored/plugins/hparams/hparams_plugin.py +206 -0
  190. tensorbored/plugins/hparams/hparams_util_pb2.py +69 -0
  191. tensorbored/plugins/hparams/json_format_compat.py +38 -0
  192. tensorbored/plugins/hparams/list_metric_evals.py +57 -0
  193. tensorbored/plugins/hparams/list_session_groups.py +1040 -0
  194. tensorbored/plugins/hparams/metadata.py +125 -0
  195. tensorbored/plugins/hparams/metrics.py +41 -0
  196. tensorbored/plugins/hparams/plugin_data_pb2.py +69 -0
  197. tensorbored/plugins/hparams/summary.py +205 -0
  198. tensorbored/plugins/hparams/summary_v2.py +597 -0
  199. tensorbored/plugins/image/__init__.py +0 -0
  200. tensorbored/plugins/image/images_plugin.py +232 -0
  201. tensorbored/plugins/image/metadata.py +65 -0
  202. tensorbored/plugins/image/plugin_data_pb2.py +34 -0
  203. tensorbored/plugins/image/summary.py +159 -0
  204. tensorbored/plugins/image/summary_v2.py +130 -0
  205. tensorbored/plugins/mesh/__init__.py +14 -0
  206. tensorbored/plugins/mesh/mesh_plugin.py +292 -0
  207. tensorbored/plugins/mesh/metadata.py +152 -0
  208. tensorbored/plugins/mesh/plugin_data_pb2.py +37 -0
  209. tensorbored/plugins/mesh/summary.py +251 -0
  210. tensorbored/plugins/mesh/summary_v2.py +214 -0
  211. tensorbored/plugins/metrics/__init__.py +0 -0
  212. tensorbored/plugins/metrics/metadata.py +17 -0
  213. tensorbored/plugins/metrics/metrics_plugin.py +623 -0
  214. tensorbored/plugins/pr_curve/__init__.py +0 -0
  215. tensorbored/plugins/pr_curve/metadata.py +75 -0
  216. tensorbored/plugins/pr_curve/plugin_data_pb2.py +34 -0
  217. tensorbored/plugins/pr_curve/pr_curves_plugin.py +241 -0
  218. tensorbored/plugins/pr_curve/summary.py +574 -0
  219. tensorbored/plugins/profile_redirect/__init__.py +0 -0
  220. tensorbored/plugins/profile_redirect/profile_redirect_plugin.py +49 -0
  221. tensorbored/plugins/projector/__init__.py +67 -0
  222. tensorbored/plugins/projector/metadata.py +26 -0
  223. tensorbored/plugins/projector/projector_config_pb2.py +54 -0
  224. tensorbored/plugins/projector/projector_plugin.py +795 -0
  225. tensorbored/plugins/projector/tf_projector_plugin/index.js +32 -0
  226. tensorbored/plugins/projector/tf_projector_plugin/projector_binary.html +524 -0
  227. tensorbored/plugins/projector/tf_projector_plugin/projector_binary.js +15536 -0
  228. tensorbored/plugins/scalar/__init__.py +0 -0
  229. tensorbored/plugins/scalar/metadata.py +60 -0
  230. tensorbored/plugins/scalar/plugin_data_pb2.py +34 -0
  231. tensorbored/plugins/scalar/scalars_plugin.py +181 -0
  232. tensorbored/plugins/scalar/summary.py +109 -0
  233. tensorbored/plugins/scalar/summary_v2.py +124 -0
  234. tensorbored/plugins/text/__init__.py +0 -0
  235. tensorbored/plugins/text/metadata.py +62 -0
  236. tensorbored/plugins/text/plugin_data_pb2.py +34 -0
  237. tensorbored/plugins/text/summary.py +114 -0
  238. tensorbored/plugins/text/summary_v2.py +124 -0
  239. tensorbored/plugins/text/text_plugin.py +288 -0
  240. tensorbored/plugins/wit_redirect/__init__.py +0 -0
  241. tensorbored/plugins/wit_redirect/wit_redirect_plugin.py +49 -0
  242. tensorbored/program.py +910 -0
  243. tensorbored/summary/__init__.py +35 -0
  244. tensorbored/summary/_output.py +124 -0
  245. tensorbored/summary/_tf/__init__.py +14 -0
  246. tensorbored/summary/_tf/summary/__init__.py +178 -0
  247. tensorbored/summary/_writer.py +105 -0
  248. tensorbored/summary/v1.py +51 -0
  249. tensorbored/summary/v2.py +25 -0
  250. tensorbored/summary/writer/__init__.py +13 -0
  251. tensorbored/summary/writer/event_file_writer.py +291 -0
  252. tensorbored/summary/writer/record_writer.py +50 -0
  253. tensorbored/util/__init__.py +0 -0
  254. tensorbored/util/encoder.py +116 -0
  255. tensorbored/util/grpc_util.py +311 -0
  256. tensorbored/util/img_mime_type_detector.py +40 -0
  257. tensorbored/util/io_util.py +20 -0
  258. tensorbored/util/lazy_tensor_creator.py +110 -0
  259. tensorbored/util/op_evaluator.py +104 -0
  260. tensorbored/util/platform_util.py +20 -0
  261. tensorbored/util/tb_logging.py +24 -0
  262. tensorbored/util/tensor_util.py +617 -0
  263. tensorbored/util/timing.py +122 -0
  264. tensorbored/version.py +21 -0
  265. tensorbored/webfiles.zip +0 -0
  266. tensorbored-2.21.0rc1769983804.dist-info/METADATA +49 -0
  267. tensorbored-2.21.0rc1769983804.dist-info/RECORD +271 -0
  268. tensorbored-2.21.0rc1769983804.dist-info/WHEEL +5 -0
  269. tensorbored-2.21.0rc1769983804.dist-info/entry_points.txt +6 -0
  270. tensorbored-2.21.0rc1769983804.dist-info/licenses/LICENSE +739 -0
  271. tensorbored-2.21.0rc1769983804.dist-info/top_level.txt +1 -0
@@ -0,0 +1,638 @@
1
+ from itertools import chain
2
+ import re
3
+ import warnings
4
+
5
+ from xml.sax.saxutils import unescape
6
+
7
+ from tensorbored._vendor.bleach import html5lib_shim
8
+ from tensorbored._vendor.bleach import parse_shim
9
+
10
+
11
+ #: Set of allowed tags
12
+ ALLOWED_TAGS = frozenset(
13
+ (
14
+ "a",
15
+ "abbr",
16
+ "acronym",
17
+ "b",
18
+ "blockquote",
19
+ "code",
20
+ "em",
21
+ "i",
22
+ "li",
23
+ "ol",
24
+ "strong",
25
+ "ul",
26
+ )
27
+ )
28
+
29
+
30
+ #: Map of allowed attributes by tag
31
+ ALLOWED_ATTRIBUTES = {
32
+ "a": ["href", "title"],
33
+ "abbr": ["title"],
34
+ "acronym": ["title"],
35
+ }
36
+
37
+ #: List of allowed protocols
38
+ ALLOWED_PROTOCOLS = frozenset(("http", "https", "mailto"))
39
+
40
+ #: Invisible characters--0 to and including 31 except 9 (tab), 10 (lf), and 13 (cr)
41
+ INVISIBLE_CHARACTERS = "".join(
42
+ [chr(c) for c in chain(range(0, 9), range(11, 13), range(14, 32))]
43
+ )
44
+
45
+ #: Regexp for characters that are invisible
46
+ INVISIBLE_CHARACTERS_RE = re.compile("[" + INVISIBLE_CHARACTERS + "]", re.UNICODE)
47
+
48
+ #: String to replace invisible characters with. This can be a character, a
49
+ #: string, or even a function that takes a Python re matchobj
50
+ INVISIBLE_REPLACEMENT_CHAR = "?"
51
+
52
+
53
+ class NoCssSanitizerWarning(UserWarning):
54
+ pass
55
+
56
+
57
+ class Cleaner:
58
+ """Cleaner for cleaning HTML fragments of malicious content
59
+
60
+ This cleaner is a security-focused function whose sole purpose is to remove
61
+ malicious content from a string such that it can be displayed as content in
62
+ a web page.
63
+
64
+ To use::
65
+
66
+ from bleach.sanitizer import Cleaner
67
+
68
+ cleaner = Cleaner()
69
+
70
+ for text in all_the_yucky_things:
71
+ sanitized = cleaner.clean(text)
72
+
73
+ .. Note::
74
+
75
+ This cleaner is not designed to use to transform content to be used in
76
+ non-web-page contexts.
77
+
78
+ .. Warning::
79
+
80
+ This cleaner is not thread-safe--the html parser has internal state.
81
+ Create a separate cleaner per thread!
82
+
83
+
84
+ """
85
+
86
+ def __init__(
87
+ self,
88
+ tags=ALLOWED_TAGS,
89
+ attributes=ALLOWED_ATTRIBUTES,
90
+ protocols=ALLOWED_PROTOCOLS,
91
+ strip=False,
92
+ strip_comments=True,
93
+ filters=None,
94
+ css_sanitizer=None,
95
+ ):
96
+ """Initializes a Cleaner
97
+
98
+ :arg set tags: set of allowed tags; defaults to
99
+ ``bleach.sanitizer.ALLOWED_TAGS``
100
+
101
+ :arg dict attributes: allowed attributes; can be a callable, list or dict;
102
+ defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
103
+
104
+ :arg list protocols: allowed list of protocols for links; defaults
105
+ to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
106
+
107
+ :arg bool strip: whether or not to strip disallowed elements
108
+
109
+ :arg bool strip_comments: whether or not to strip HTML comments
110
+
111
+ :arg list filters: list of html5lib Filter classes to pass streamed content through
112
+
113
+ .. seealso:: http://html5lib.readthedocs.io/en/latest/movingparts.html#filters
114
+
115
+ .. Warning::
116
+
117
+ Using filters changes the output of ``bleach.Cleaner.clean``.
118
+ Make sure the way the filters change the output are secure.
119
+
120
+ :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
121
+ sanitizing style attribute values and style text; defaults to None
122
+
123
+ """
124
+ self.tags = tags
125
+ self.attributes = attributes
126
+ self.protocols = protocols
127
+ self.strip = strip
128
+ self.strip_comments = strip_comments
129
+ self.filters = filters or []
130
+ self.css_sanitizer = css_sanitizer
131
+
132
+ self.parser = html5lib_shim.BleachHTMLParser(
133
+ tags=self.tags,
134
+ strip=self.strip,
135
+ consume_entities=False,
136
+ namespaceHTMLElements=False,
137
+ )
138
+ self.walker = html5lib_shim.getTreeWalker("etree")
139
+ self.serializer = html5lib_shim.BleachHTMLSerializer(
140
+ quote_attr_values="always",
141
+ omit_optional_tags=False,
142
+ escape_lt_in_attrs=True,
143
+ # We want to leave entities as they are without escaping or
144
+ # resolving or expanding
145
+ resolve_entities=False,
146
+ # Bleach has its own sanitizer, so don't use the html5lib one
147
+ sanitize=False,
148
+ # clean preserves attr order
149
+ alphabetical_attributes=False,
150
+ )
151
+
152
+ if css_sanitizer is None:
153
+ # FIXME(willkg): this doesn't handle when attributes or an
154
+ # attributes value is a callable
155
+ attributes_values = []
156
+ if isinstance(attributes, list):
157
+ attributes_values = attributes
158
+
159
+ elif isinstance(attributes, dict):
160
+ attributes_values = []
161
+ for values in attributes.values():
162
+ if isinstance(values, (list, tuple)):
163
+ attributes_values.extend(values)
164
+
165
+ if "style" in attributes_values:
166
+ warnings.warn(
167
+ "'style' attribute specified, but css_sanitizer not set.",
168
+ category=NoCssSanitizerWarning,
169
+ )
170
+
171
+ def clean(self, text):
172
+ """Cleans text and returns sanitized result as unicode
173
+
174
+ :arg str text: text to be cleaned
175
+
176
+ :returns: sanitized text as unicode
177
+
178
+ :raises TypeError: if ``text`` is not a text type
179
+
180
+ """
181
+ if not isinstance(text, str):
182
+ message = (
183
+ f"argument cannot be of {text.__class__.__name__!r} type, "
184
+ + "must be of text type"
185
+ )
186
+ raise TypeError(message)
187
+
188
+ if not text:
189
+ return ""
190
+
191
+ dom = self.parser.parseFragment(text)
192
+ filtered = BleachSanitizerFilter(
193
+ source=self.walker(dom),
194
+ allowed_tags=self.tags,
195
+ attributes=self.attributes,
196
+ strip_disallowed_tags=self.strip,
197
+ strip_html_comments=self.strip_comments,
198
+ css_sanitizer=self.css_sanitizer,
199
+ allowed_protocols=self.protocols,
200
+ )
201
+
202
+ # Apply any filters after the BleachSanitizerFilter
203
+ for filter_class in self.filters:
204
+ filtered = filter_class(source=filtered)
205
+
206
+ return self.serializer.render(filtered)
207
+
208
+
209
+ def attribute_filter_factory(attributes):
210
+ """Generates attribute filter function for the given attributes value
211
+
212
+ The attributes value can take one of several shapes. This returns a filter
213
+ function appropriate to the attributes value. One nice thing about this is
214
+ that there's less if/then shenanigans in the ``allow_token`` method.
215
+
216
+ """
217
+ if callable(attributes):
218
+ return attributes
219
+
220
+ if isinstance(attributes, dict):
221
+
222
+ def _attr_filter(tag, attr, value):
223
+ if tag in attributes:
224
+ attr_val = attributes[tag]
225
+ if callable(attr_val):
226
+ return attr_val(tag, attr, value)
227
+
228
+ if attr in attr_val:
229
+ return True
230
+
231
+ if "*" in attributes:
232
+ attr_val = attributes["*"]
233
+ if callable(attr_val):
234
+ return attr_val(tag, attr, value)
235
+
236
+ return attr in attr_val
237
+
238
+ return False
239
+
240
+ return _attr_filter
241
+
242
+ if isinstance(attributes, list):
243
+
244
+ def _attr_filter(tag, attr, value):
245
+ return attr in attributes
246
+
247
+ return _attr_filter
248
+
249
+ raise ValueError("attributes needs to be a callable, a list or a dict")
250
+
251
+
252
+ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
253
+ """html5lib Filter that sanitizes text
254
+
255
+ This filter can be used anywhere html5lib filters can be used.
256
+
257
+ """
258
+
259
+ def __init__(
260
+ self,
261
+ source,
262
+ allowed_tags=ALLOWED_TAGS,
263
+ attributes=ALLOWED_ATTRIBUTES,
264
+ allowed_protocols=ALLOWED_PROTOCOLS,
265
+ attr_val_is_uri=html5lib_shim.attr_val_is_uri,
266
+ svg_attr_val_allows_ref=html5lib_shim.svg_attr_val_allows_ref,
267
+ svg_allow_local_href=html5lib_shim.svg_allow_local_href,
268
+ strip_disallowed_tags=False,
269
+ strip_html_comments=True,
270
+ css_sanitizer=None,
271
+ ):
272
+ """Creates a BleachSanitizerFilter instance
273
+
274
+ :arg source: html5lib TreeWalker stream as an html5lib TreeWalker
275
+
276
+ :arg set allowed_tags: set of allowed tags; defaults to
277
+ ``bleach.sanitizer.ALLOWED_TAGS``
278
+
279
+ :arg dict attributes: allowed attributes; can be a callable, list or dict;
280
+ defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
281
+
282
+ :arg list allowed_protocols: allowed list of protocols for links; defaults
283
+ to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
284
+
285
+ :arg attr_val_is_uri: set of attributes that have URI values
286
+
287
+ :arg svg_attr_val_allows_ref: set of SVG attributes that can have
288
+ references
289
+
290
+ :arg svg_allow_local_href: set of SVG elements that can have local
291
+ hrefs
292
+
293
+ :arg bool strip_disallowed_tags: whether or not to strip disallowed
294
+ tags
295
+
296
+ :arg bool strip_html_comments: whether or not to strip HTML comments
297
+
298
+ :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
299
+ sanitizing style attribute values and style text; defaults to None
300
+
301
+ """
302
+ # NOTE(willkg): This is the superclass of
303
+ # html5lib.filters.sanitizer.Filter. We call this directly skipping the
304
+ # __init__ for html5lib.filters.sanitizer.Filter because that does
305
+ # things we don't need to do and kicks up the deprecation warning for
306
+ # using Sanitizer.
307
+ html5lib_shim.Filter.__init__(self, source)
308
+
309
+ self.allowed_tags = frozenset(allowed_tags)
310
+ self.allowed_protocols = frozenset(allowed_protocols)
311
+
312
+ self.attr_filter = attribute_filter_factory(attributes)
313
+ self.strip_disallowed_tags = strip_disallowed_tags
314
+ self.strip_html_comments = strip_html_comments
315
+
316
+ self.attr_val_is_uri = attr_val_is_uri
317
+ self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
318
+ self.css_sanitizer = css_sanitizer
319
+ self.svg_allow_local_href = svg_allow_local_href
320
+
321
+ def sanitize_stream(self, token_iterator):
322
+ for token in token_iterator:
323
+ ret = self.sanitize_token(token)
324
+
325
+ if not ret:
326
+ continue
327
+
328
+ if isinstance(ret, list):
329
+ yield from ret
330
+ else:
331
+ yield ret
332
+
333
+ def merge_characters(self, token_iterator):
334
+ """Merge consecutive Characters tokens in a stream"""
335
+ characters_buffer = []
336
+
337
+ for token in token_iterator:
338
+ if characters_buffer:
339
+ if token["type"] == "Characters":
340
+ characters_buffer.append(token)
341
+ continue
342
+ else:
343
+ # Merge all the characters tokens together into one and then
344
+ # operate on it.
345
+ new_token = {
346
+ "data": "".join(
347
+ [char_token["data"] for char_token in characters_buffer]
348
+ ),
349
+ "type": "Characters",
350
+ }
351
+ characters_buffer = []
352
+ yield new_token
353
+
354
+ elif token["type"] == "Characters":
355
+ characters_buffer.append(token)
356
+ continue
357
+
358
+ yield token
359
+
360
+ new_token = {
361
+ "data": "".join([char_token["data"] for char_token in characters_buffer]),
362
+ "type": "Characters",
363
+ }
364
+ yield new_token
365
+
366
+ def __iter__(self):
367
+ return self.merge_characters(
368
+ self.sanitize_stream(html5lib_shim.Filter.__iter__(self))
369
+ )
370
+
371
+ def sanitize_token(self, token):
372
+ """Sanitize a token either by HTML-encoding or dropping.
373
+
374
+ Unlike sanitizer.Filter, allowed_attributes can be a dict of {'tag':
375
+ ['attribute', 'pairs'], 'tag': callable}.
376
+
377
+ Here callable is a function with two arguments of attribute name and
378
+ value. It should return true of false.
379
+
380
+ Also gives the option to strip tags instead of encoding.
381
+
382
+ :arg dict token: token to sanitize
383
+
384
+ :returns: token or list of tokens
385
+
386
+ """
387
+ token_type = token["type"]
388
+ if token_type in ["StartTag", "EndTag", "EmptyTag"]:
389
+ if token["name"] in self.allowed_tags:
390
+ return self.allow_token(token)
391
+
392
+ elif self.strip_disallowed_tags:
393
+ return None
394
+
395
+ else:
396
+ return self.disallowed_token(token)
397
+
398
+ elif token_type == "Comment":
399
+ if not self.strip_html_comments:
400
+ # call lxml.sax.saxutils to escape &, <, and > in addition to " and '
401
+ token["data"] = html5lib_shim.escape(
402
+ token["data"], entities={'"': "&quot;", "'": "&#x27;"}
403
+ )
404
+ return token
405
+ else:
406
+ return None
407
+
408
+ elif token_type == "Characters":
409
+ return self.sanitize_characters(token)
410
+
411
+ else:
412
+ return token
413
+
414
+ def sanitize_characters(self, token):
415
+ """Handles Characters tokens
416
+
417
+ Our overridden tokenizer doesn't do anything with entities. However,
418
+ that means that the serializer will convert all ``&`` in Characters
419
+ tokens to ``&amp;``.
420
+
421
+ Since we don't want that, we extract entities here and convert them to
422
+ Entity tokens so the serializer will let them be.
423
+
424
+ :arg token: the Characters token to work on
425
+
426
+ :returns: a list of tokens
427
+
428
+ """
429
+ data = token.get("data", "")
430
+
431
+ if not data:
432
+ return token
433
+
434
+ data = INVISIBLE_CHARACTERS_RE.sub(INVISIBLE_REPLACEMENT_CHAR, data)
435
+ token["data"] = data
436
+
437
+ # If there isn't a & in the data, we can return now
438
+ if "&" not in data:
439
+ return token
440
+
441
+ new_tokens = []
442
+
443
+ # For each possible entity that starts with a "&", we try to extract an
444
+ # actual entity and re-tokenize accordingly
445
+ for part in html5lib_shim.next_possible_entity(data):
446
+ if not part:
447
+ continue
448
+
449
+ if part.startswith("&"):
450
+ entity = html5lib_shim.match_entity(part)
451
+ if entity is not None:
452
+ if entity == "amp":
453
+ # LinkifyFilter can't match urls across token boundaries
454
+ # which is problematic with &amp; since that shows up in
455
+ # querystrings all the time. This special-cases &amp;
456
+ # and converts it to a & and sticks it in as a
457
+ # Characters token. It'll get merged with surrounding
458
+ # tokens in the BleachSanitizerfilter.__iter__ and
459
+ # escaped in the serializer.
460
+ new_tokens.append({"type": "Characters", "data": "&"})
461
+ else:
462
+ new_tokens.append({"type": "Entity", "name": entity})
463
+
464
+ # Length of the entity plus 2--one for & at the beginning
465
+ # and one for ; at the end
466
+ remainder = part[len(entity) + 2 :]
467
+ if remainder:
468
+ new_tokens.append({"type": "Characters", "data": remainder})
469
+ continue
470
+
471
+ new_tokens.append({"type": "Characters", "data": part})
472
+
473
+ return new_tokens
474
+
475
+ def sanitize_uri_value(self, value, allowed_protocols):
476
+ """Checks a uri value to see if it's allowed
477
+
478
+ :arg value: the uri value to sanitize
479
+ :arg allowed_protocols: list of allowed protocols
480
+
481
+ :returns: allowed value or None
482
+
483
+ """
484
+ # NOTE(willkg): This transforms the value into a normalized one that's
485
+ # easier to match and verify, but shouldn't get returned since it's
486
+ # vastly different than the original value.
487
+
488
+ # Convert all character entities in the value
489
+ normalized_uri = html5lib_shim.convert_entities(value)
490
+
491
+ # Nix backtick, space characters, and control characters
492
+ normalized_uri = re.sub(r"[`\000-\040\177-\240\s]+", "", normalized_uri)
493
+
494
+ # Remove REPLACEMENT characters
495
+ normalized_uri = normalized_uri.replace("\ufffd", "")
496
+
497
+ # Lowercase it--this breaks the value, but makes it easier to match
498
+ # against
499
+ normalized_uri = normalized_uri.lower()
500
+
501
+ try:
502
+ # Drop attributes with uri values that have protocols that aren't
503
+ # allowed
504
+ parsed = parse_shim.urlparse(normalized_uri)
505
+ except ValueError:
506
+ # URI is impossible to parse, therefore it's not allowed
507
+ return None
508
+
509
+ if parsed.scheme:
510
+ # If urlparse found a scheme, check that
511
+ if parsed.scheme in allowed_protocols:
512
+ return value
513
+
514
+ else:
515
+ # Allow uris that are just an anchor
516
+ if normalized_uri.startswith("#"):
517
+ return value
518
+
519
+ # Handle protocols that urlparse doesn't recognize like "myprotocol"
520
+ if (
521
+ ":" in normalized_uri
522
+ and normalized_uri.split(":")[0] in allowed_protocols
523
+ ):
524
+ return value
525
+
526
+ # If there's no protocol/scheme specified, then assume it's "http" or
527
+ # "https" and see if that's allowed
528
+ if "http" in allowed_protocols or "https" in allowed_protocols:
529
+ return value
530
+
531
+ return None
532
+
533
+ def allow_token(self, token):
534
+ """Handles the case where we're allowing the tag"""
535
+ if "data" in token:
536
+ # Loop through all the attributes and drop the ones that are not
537
+ # allowed, are unsafe or break other rules. Additionally, fix
538
+ # attribute values that need fixing.
539
+ #
540
+ # At the end of this loop, we have the final set of attributes
541
+ # we're keeping.
542
+ attrs = {}
543
+ for namespaced_name, val in token["data"].items():
544
+ namespace, name = namespaced_name
545
+
546
+ # Drop attributes that are not explicitly allowed
547
+ #
548
+ # NOTE(willkg): We pass in the attribute name--not a namespaced
549
+ # name.
550
+ if not self.attr_filter(token["name"], name, val):
551
+ continue
552
+
553
+ # Drop attributes with uri values that use a disallowed protocol
554
+ # Sanitize attributes with uri values
555
+ if namespaced_name in self.attr_val_is_uri:
556
+ new_value = self.sanitize_uri_value(val, self.allowed_protocols)
557
+ if new_value is None:
558
+ continue
559
+ val = new_value
560
+
561
+ # Drop values in svg attrs with non-local IRIs
562
+ if namespaced_name in self.svg_attr_val_allows_ref:
563
+ new_val = re.sub(r"url\s*\(\s*[^#\s][^)]+?\)", " ", unescape(val))
564
+ new_val = new_val.strip()
565
+ if not new_val:
566
+ continue
567
+
568
+ else:
569
+ # Replace the val with the unescaped version because
570
+ # it's a iri
571
+ val = new_val
572
+
573
+ # Drop href and xlink:href attr for svg elements with non-local IRIs
574
+ if (None, token["name"]) in self.svg_allow_local_href:
575
+ if namespaced_name in [
576
+ (None, "href"),
577
+ (html5lib_shim.namespaces["xlink"], "href"),
578
+ ]:
579
+ if re.search(r"^\s*[^#\s]", val):
580
+ continue
581
+
582
+ # If it's a style attribute, sanitize it
583
+ if namespaced_name == (None, "style"):
584
+ if self.css_sanitizer:
585
+ val = self.css_sanitizer.sanitize_css(val)
586
+ else:
587
+ # FIXME(willkg): if style is allowed, but no
588
+ # css_sanitizer was set up, then this is probably a
589
+ # mistake and we should raise an error here
590
+ #
591
+ # For now, we're going to set the value to "" because
592
+ # there was no sanitizer set
593
+ val = ""
594
+
595
+ # At this point, we want to keep the attribute, so add it in
596
+ attrs[namespaced_name] = val
597
+
598
+ token["data"] = attrs
599
+
600
+ return token
601
+
602
+ def disallowed_token(self, token):
603
+ token_type = token["type"]
604
+ if token_type == "EndTag":
605
+ token["data"] = f"</{token['name']}>"
606
+
607
+ elif token["data"]:
608
+ assert token_type in ("StartTag", "EmptyTag")
609
+ attrs = []
610
+ for (ns, name), v in token["data"].items():
611
+ # If we end up with a namespace, but no name, switch them so we
612
+ # have a valid name to use.
613
+ if ns and not name:
614
+ ns, name = name, ns
615
+
616
+ # Figure out namespaced name if the namespace is appropriate
617
+ # and exists; if the ns isn't in prefixes, then drop it.
618
+ if ns is None or ns not in html5lib_shim.prefixes:
619
+ namespaced_name = name
620
+ else:
621
+ namespaced_name = f"{html5lib_shim.prefixes[ns]}:{name}"
622
+
623
+ # NOTE(willkg): HTMLSerializer escapes attribute values
624
+ # already, so if we do it here (like HTMLSerializer does),
625
+ # then we end up double-escaping.
626
+ attrs.append(f' {namespaced_name}="{v}"')
627
+ token["data"] = f"<{token['name']}{''.join(attrs)}>"
628
+
629
+ else:
630
+ token["data"] = f"<{token['name']}>"
631
+
632
+ if token.get("selfClosing"):
633
+ token["data"] = f"{token['data'][:-1]}/>"
634
+
635
+ token["type"] = "Characters"
636
+
637
+ del token["name"]
638
+ return token
@@ -0,0 +1,19 @@
1
+ """
2
+ Replacement module for what html5lib uses six for.
3
+ """
4
+
5
+ import http.client
6
+ import operator
7
+ import urllib
8
+
9
+
10
+ PY3 = True
11
+ binary_type = bytes
12
+ string_types = (str,)
13
+ text_type = str
14
+ unichr = chr
15
+ viewkeys = operator.methodcaller("keys")
16
+
17
+ http_client = http.client
18
+ urllib = urllib
19
+ urllib_parse = urllib.parse