tensorbored 2.21.0rc1769983804__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (271) hide show
  1. tensorbored/__init__.py +112 -0
  2. tensorbored/_vendor/__init__.py +0 -0
  3. tensorbored/_vendor/bleach/__init__.py +125 -0
  4. tensorbored/_vendor/bleach/_vendor/__init__.py +0 -0
  5. tensorbored/_vendor/bleach/_vendor/html5lib/__init__.py +35 -0
  6. tensorbored/_vendor/bleach/_vendor/html5lib/_ihatexml.py +289 -0
  7. tensorbored/_vendor/bleach/_vendor/html5lib/_inputstream.py +918 -0
  8. tensorbored/_vendor/bleach/_vendor/html5lib/_tokenizer.py +1735 -0
  9. tensorbored/_vendor/bleach/_vendor/html5lib/_trie/__init__.py +5 -0
  10. tensorbored/_vendor/bleach/_vendor/html5lib/_trie/_base.py +40 -0
  11. tensorbored/_vendor/bleach/_vendor/html5lib/_trie/py.py +67 -0
  12. tensorbored/_vendor/bleach/_vendor/html5lib/_utils.py +159 -0
  13. tensorbored/_vendor/bleach/_vendor/html5lib/constants.py +2946 -0
  14. tensorbored/_vendor/bleach/_vendor/html5lib/filters/__init__.py +0 -0
  15. tensorbored/_vendor/bleach/_vendor/html5lib/filters/alphabeticalattributes.py +29 -0
  16. tensorbored/_vendor/bleach/_vendor/html5lib/filters/base.py +12 -0
  17. tensorbored/_vendor/bleach/_vendor/html5lib/filters/inject_meta_charset.py +73 -0
  18. tensorbored/_vendor/bleach/_vendor/html5lib/filters/lint.py +93 -0
  19. tensorbored/_vendor/bleach/_vendor/html5lib/filters/optionaltags.py +207 -0
  20. tensorbored/_vendor/bleach/_vendor/html5lib/filters/sanitizer.py +916 -0
  21. tensorbored/_vendor/bleach/_vendor/html5lib/filters/whitespace.py +38 -0
  22. tensorbored/_vendor/bleach/_vendor/html5lib/html5parser.py +2795 -0
  23. tensorbored/_vendor/bleach/_vendor/html5lib/serializer.py +409 -0
  24. tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/__init__.py +30 -0
  25. tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/genshi.py +54 -0
  26. tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/sax.py +50 -0
  27. tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/__init__.py +88 -0
  28. tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/base.py +417 -0
  29. tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/dom.py +239 -0
  30. tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/etree.py +343 -0
  31. tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/etree_lxml.py +392 -0
  32. tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/__init__.py +154 -0
  33. tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/base.py +252 -0
  34. tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/dom.py +43 -0
  35. tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/etree.py +131 -0
  36. tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/etree_lxml.py +215 -0
  37. tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/genshi.py +69 -0
  38. tensorbored/_vendor/bleach/_vendor/parse.py +1078 -0
  39. tensorbored/_vendor/bleach/callbacks.py +32 -0
  40. tensorbored/_vendor/bleach/html5lib_shim.py +757 -0
  41. tensorbored/_vendor/bleach/linkifier.py +633 -0
  42. tensorbored/_vendor/bleach/parse_shim.py +1 -0
  43. tensorbored/_vendor/bleach/sanitizer.py +638 -0
  44. tensorbored/_vendor/bleach/six_shim.py +19 -0
  45. tensorbored/_vendor/webencodings/__init__.py +342 -0
  46. tensorbored/_vendor/webencodings/labels.py +231 -0
  47. tensorbored/_vendor/webencodings/mklabels.py +59 -0
  48. tensorbored/_vendor/webencodings/x_user_defined.py +325 -0
  49. tensorbored/assets.py +36 -0
  50. tensorbored/auth.py +102 -0
  51. tensorbored/backend/__init__.py +0 -0
  52. tensorbored/backend/application.py +604 -0
  53. tensorbored/backend/auth_context_middleware.py +38 -0
  54. tensorbored/backend/client_feature_flags.py +113 -0
  55. tensorbored/backend/empty_path_redirect.py +46 -0
  56. tensorbored/backend/event_processing/__init__.py +0 -0
  57. tensorbored/backend/event_processing/data_ingester.py +276 -0
  58. tensorbored/backend/event_processing/data_provider.py +535 -0
  59. tensorbored/backend/event_processing/directory_loader.py +142 -0
  60. tensorbored/backend/event_processing/directory_watcher.py +272 -0
  61. tensorbored/backend/event_processing/event_accumulator.py +950 -0
  62. tensorbored/backend/event_processing/event_file_inspector.py +463 -0
  63. tensorbored/backend/event_processing/event_file_loader.py +292 -0
  64. tensorbored/backend/event_processing/event_multiplexer.py +521 -0
  65. tensorbored/backend/event_processing/event_util.py +68 -0
  66. tensorbored/backend/event_processing/io_wrapper.py +223 -0
  67. tensorbored/backend/event_processing/plugin_asset_util.py +104 -0
  68. tensorbored/backend/event_processing/plugin_event_accumulator.py +721 -0
  69. tensorbored/backend/event_processing/plugin_event_multiplexer.py +522 -0
  70. tensorbored/backend/event_processing/reservoir.py +266 -0
  71. tensorbored/backend/event_processing/tag_types.py +29 -0
  72. tensorbored/backend/experiment_id.py +71 -0
  73. tensorbored/backend/experimental_plugin.py +51 -0
  74. tensorbored/backend/http_util.py +263 -0
  75. tensorbored/backend/json_util.py +70 -0
  76. tensorbored/backend/path_prefix.py +67 -0
  77. tensorbored/backend/process_graph.py +74 -0
  78. tensorbored/backend/security_validator.py +202 -0
  79. tensorbored/compat/__init__.py +69 -0
  80. tensorbored/compat/proto/__init__.py +0 -0
  81. tensorbored/compat/proto/allocation_description_pb2.py +35 -0
  82. tensorbored/compat/proto/api_def_pb2.py +82 -0
  83. tensorbored/compat/proto/attr_value_pb2.py +80 -0
  84. tensorbored/compat/proto/cluster_pb2.py +58 -0
  85. tensorbored/compat/proto/config_pb2.py +271 -0
  86. tensorbored/compat/proto/coordination_config_pb2.py +45 -0
  87. tensorbored/compat/proto/cost_graph_pb2.py +87 -0
  88. tensorbored/compat/proto/cpp_shape_inference_pb2.py +70 -0
  89. tensorbored/compat/proto/debug_pb2.py +65 -0
  90. tensorbored/compat/proto/event_pb2.py +149 -0
  91. tensorbored/compat/proto/full_type_pb2.py +74 -0
  92. tensorbored/compat/proto/function_pb2.py +157 -0
  93. tensorbored/compat/proto/graph_debug_info_pb2.py +111 -0
  94. tensorbored/compat/proto/graph_pb2.py +41 -0
  95. tensorbored/compat/proto/histogram_pb2.py +39 -0
  96. tensorbored/compat/proto/meta_graph_pb2.py +254 -0
  97. tensorbored/compat/proto/node_def_pb2.py +61 -0
  98. tensorbored/compat/proto/op_def_pb2.py +81 -0
  99. tensorbored/compat/proto/resource_handle_pb2.py +48 -0
  100. tensorbored/compat/proto/rewriter_config_pb2.py +93 -0
  101. tensorbored/compat/proto/rpc_options_pb2.py +35 -0
  102. tensorbored/compat/proto/saved_object_graph_pb2.py +193 -0
  103. tensorbored/compat/proto/saver_pb2.py +38 -0
  104. tensorbored/compat/proto/step_stats_pb2.py +116 -0
  105. tensorbored/compat/proto/struct_pb2.py +144 -0
  106. tensorbored/compat/proto/summary_pb2.py +111 -0
  107. tensorbored/compat/proto/tensor_description_pb2.py +38 -0
  108. tensorbored/compat/proto/tensor_pb2.py +68 -0
  109. tensorbored/compat/proto/tensor_shape_pb2.py +46 -0
  110. tensorbored/compat/proto/tfprof_log_pb2.py +307 -0
  111. tensorbored/compat/proto/trackable_object_graph_pb2.py +90 -0
  112. tensorbored/compat/proto/types_pb2.py +105 -0
  113. tensorbored/compat/proto/variable_pb2.py +62 -0
  114. tensorbored/compat/proto/verifier_config_pb2.py +38 -0
  115. tensorbored/compat/proto/versions_pb2.py +35 -0
  116. tensorbored/compat/tensorflow_stub/__init__.py +38 -0
  117. tensorbored/compat/tensorflow_stub/app.py +124 -0
  118. tensorbored/compat/tensorflow_stub/compat/__init__.py +131 -0
  119. tensorbored/compat/tensorflow_stub/compat/v1/__init__.py +20 -0
  120. tensorbored/compat/tensorflow_stub/dtypes.py +692 -0
  121. tensorbored/compat/tensorflow_stub/error_codes.py +169 -0
  122. tensorbored/compat/tensorflow_stub/errors.py +507 -0
  123. tensorbored/compat/tensorflow_stub/flags.py +124 -0
  124. tensorbored/compat/tensorflow_stub/io/__init__.py +17 -0
  125. tensorbored/compat/tensorflow_stub/io/gfile.py +1011 -0
  126. tensorbored/compat/tensorflow_stub/pywrap_tensorflow.py +285 -0
  127. tensorbored/compat/tensorflow_stub/tensor_shape.py +1035 -0
  128. tensorbored/context.py +129 -0
  129. tensorbored/data/__init__.py +0 -0
  130. tensorbored/data/grpc_provider.py +365 -0
  131. tensorbored/data/ingester.py +46 -0
  132. tensorbored/data/proto/__init__.py +0 -0
  133. tensorbored/data/proto/data_provider_pb2.py +517 -0
  134. tensorbored/data/proto/data_provider_pb2_grpc.py +374 -0
  135. tensorbored/data/provider.py +1365 -0
  136. tensorbored/data/server_ingester.py +301 -0
  137. tensorbored/data_compat.py +159 -0
  138. tensorbored/dataclass_compat.py +224 -0
  139. tensorbored/default.py +124 -0
  140. tensorbored/errors.py +130 -0
  141. tensorbored/lazy.py +99 -0
  142. tensorbored/main.py +48 -0
  143. tensorbored/main_lib.py +62 -0
  144. tensorbored/manager.py +487 -0
  145. tensorbored/notebook.py +441 -0
  146. tensorbored/plugin_util.py +266 -0
  147. tensorbored/plugins/__init__.py +0 -0
  148. tensorbored/plugins/audio/__init__.py +0 -0
  149. tensorbored/plugins/audio/audio_plugin.py +229 -0
  150. tensorbored/plugins/audio/metadata.py +69 -0
  151. tensorbored/plugins/audio/plugin_data_pb2.py +37 -0
  152. tensorbored/plugins/audio/summary.py +230 -0
  153. tensorbored/plugins/audio/summary_v2.py +124 -0
  154. tensorbored/plugins/base_plugin.py +367 -0
  155. tensorbored/plugins/core/__init__.py +0 -0
  156. tensorbored/plugins/core/core_plugin.py +981 -0
  157. tensorbored/plugins/custom_scalar/__init__.py +0 -0
  158. tensorbored/plugins/custom_scalar/custom_scalars_plugin.py +320 -0
  159. tensorbored/plugins/custom_scalar/layout_pb2.py +85 -0
  160. tensorbored/plugins/custom_scalar/metadata.py +35 -0
  161. tensorbored/plugins/custom_scalar/summary.py +79 -0
  162. tensorbored/plugins/debugger_v2/__init__.py +0 -0
  163. tensorbored/plugins/debugger_v2/debug_data_multiplexer.py +631 -0
  164. tensorbored/plugins/debugger_v2/debug_data_provider.py +634 -0
  165. tensorbored/plugins/debugger_v2/debugger_v2_plugin.py +504 -0
  166. tensorbored/plugins/distribution/__init__.py +0 -0
  167. tensorbored/plugins/distribution/compressor.py +158 -0
  168. tensorbored/plugins/distribution/distributions_plugin.py +116 -0
  169. tensorbored/plugins/distribution/metadata.py +19 -0
  170. tensorbored/plugins/graph/__init__.py +0 -0
  171. tensorbored/plugins/graph/graph_util.py +129 -0
  172. tensorbored/plugins/graph/graphs_plugin.py +336 -0
  173. tensorbored/plugins/graph/keras_util.py +328 -0
  174. tensorbored/plugins/graph/metadata.py +42 -0
  175. tensorbored/plugins/histogram/__init__.py +0 -0
  176. tensorbored/plugins/histogram/histograms_plugin.py +144 -0
  177. tensorbored/plugins/histogram/metadata.py +63 -0
  178. tensorbored/plugins/histogram/plugin_data_pb2.py +34 -0
  179. tensorbored/plugins/histogram/summary.py +234 -0
  180. tensorbored/plugins/histogram/summary_v2.py +292 -0
  181. tensorbored/plugins/hparams/__init__.py +14 -0
  182. tensorbored/plugins/hparams/_keras.py +93 -0
  183. tensorbored/plugins/hparams/api.py +130 -0
  184. tensorbored/plugins/hparams/api_pb2.py +208 -0
  185. tensorbored/plugins/hparams/backend_context.py +606 -0
  186. tensorbored/plugins/hparams/download_data.py +158 -0
  187. tensorbored/plugins/hparams/error.py +26 -0
  188. tensorbored/plugins/hparams/get_experiment.py +71 -0
  189. tensorbored/plugins/hparams/hparams_plugin.py +206 -0
  190. tensorbored/plugins/hparams/hparams_util_pb2.py +69 -0
  191. tensorbored/plugins/hparams/json_format_compat.py +38 -0
  192. tensorbored/plugins/hparams/list_metric_evals.py +57 -0
  193. tensorbored/plugins/hparams/list_session_groups.py +1040 -0
  194. tensorbored/plugins/hparams/metadata.py +125 -0
  195. tensorbored/plugins/hparams/metrics.py +41 -0
  196. tensorbored/plugins/hparams/plugin_data_pb2.py +69 -0
  197. tensorbored/plugins/hparams/summary.py +205 -0
  198. tensorbored/plugins/hparams/summary_v2.py +597 -0
  199. tensorbored/plugins/image/__init__.py +0 -0
  200. tensorbored/plugins/image/images_plugin.py +232 -0
  201. tensorbored/plugins/image/metadata.py +65 -0
  202. tensorbored/plugins/image/plugin_data_pb2.py +34 -0
  203. tensorbored/plugins/image/summary.py +159 -0
  204. tensorbored/plugins/image/summary_v2.py +130 -0
  205. tensorbored/plugins/mesh/__init__.py +14 -0
  206. tensorbored/plugins/mesh/mesh_plugin.py +292 -0
  207. tensorbored/plugins/mesh/metadata.py +152 -0
  208. tensorbored/plugins/mesh/plugin_data_pb2.py +37 -0
  209. tensorbored/plugins/mesh/summary.py +251 -0
  210. tensorbored/plugins/mesh/summary_v2.py +214 -0
  211. tensorbored/plugins/metrics/__init__.py +0 -0
  212. tensorbored/plugins/metrics/metadata.py +17 -0
  213. tensorbored/plugins/metrics/metrics_plugin.py +623 -0
  214. tensorbored/plugins/pr_curve/__init__.py +0 -0
  215. tensorbored/plugins/pr_curve/metadata.py +75 -0
  216. tensorbored/plugins/pr_curve/plugin_data_pb2.py +34 -0
  217. tensorbored/plugins/pr_curve/pr_curves_plugin.py +241 -0
  218. tensorbored/plugins/pr_curve/summary.py +574 -0
  219. tensorbored/plugins/profile_redirect/__init__.py +0 -0
  220. tensorbored/plugins/profile_redirect/profile_redirect_plugin.py +49 -0
  221. tensorbored/plugins/projector/__init__.py +67 -0
  222. tensorbored/plugins/projector/metadata.py +26 -0
  223. tensorbored/plugins/projector/projector_config_pb2.py +54 -0
  224. tensorbored/plugins/projector/projector_plugin.py +795 -0
  225. tensorbored/plugins/projector/tf_projector_plugin/index.js +32 -0
  226. tensorbored/plugins/projector/tf_projector_plugin/projector_binary.html +524 -0
  227. tensorbored/plugins/projector/tf_projector_plugin/projector_binary.js +15536 -0
  228. tensorbored/plugins/scalar/__init__.py +0 -0
  229. tensorbored/plugins/scalar/metadata.py +60 -0
  230. tensorbored/plugins/scalar/plugin_data_pb2.py +34 -0
  231. tensorbored/plugins/scalar/scalars_plugin.py +181 -0
  232. tensorbored/plugins/scalar/summary.py +109 -0
  233. tensorbored/plugins/scalar/summary_v2.py +124 -0
  234. tensorbored/plugins/text/__init__.py +0 -0
  235. tensorbored/plugins/text/metadata.py +62 -0
  236. tensorbored/plugins/text/plugin_data_pb2.py +34 -0
  237. tensorbored/plugins/text/summary.py +114 -0
  238. tensorbored/plugins/text/summary_v2.py +124 -0
  239. tensorbored/plugins/text/text_plugin.py +288 -0
  240. tensorbored/plugins/wit_redirect/__init__.py +0 -0
  241. tensorbored/plugins/wit_redirect/wit_redirect_plugin.py +49 -0
  242. tensorbored/program.py +910 -0
  243. tensorbored/summary/__init__.py +35 -0
  244. tensorbored/summary/_output.py +124 -0
  245. tensorbored/summary/_tf/__init__.py +14 -0
  246. tensorbored/summary/_tf/summary/__init__.py +178 -0
  247. tensorbored/summary/_writer.py +105 -0
  248. tensorbored/summary/v1.py +51 -0
  249. tensorbored/summary/v2.py +25 -0
  250. tensorbored/summary/writer/__init__.py +13 -0
  251. tensorbored/summary/writer/event_file_writer.py +291 -0
  252. tensorbored/summary/writer/record_writer.py +50 -0
  253. tensorbored/util/__init__.py +0 -0
  254. tensorbored/util/encoder.py +116 -0
  255. tensorbored/util/grpc_util.py +311 -0
  256. tensorbored/util/img_mime_type_detector.py +40 -0
  257. tensorbored/util/io_util.py +20 -0
  258. tensorbored/util/lazy_tensor_creator.py +110 -0
  259. tensorbored/util/op_evaluator.py +104 -0
  260. tensorbored/util/platform_util.py +20 -0
  261. tensorbored/util/tb_logging.py +24 -0
  262. tensorbored/util/tensor_util.py +617 -0
  263. tensorbored/util/timing.py +122 -0
  264. tensorbored/version.py +21 -0
  265. tensorbored/webfiles.zip +0 -0
  266. tensorbored-2.21.0rc1769983804.dist-info/METADATA +49 -0
  267. tensorbored-2.21.0rc1769983804.dist-info/RECORD +271 -0
  268. tensorbored-2.21.0rc1769983804.dist-info/WHEEL +5 -0
  269. tensorbored-2.21.0rc1769983804.dist-info/entry_points.txt +6 -0
  270. tensorbored-2.21.0rc1769983804.dist-info/licenses/LICENSE +739 -0
  271. tensorbored-2.21.0rc1769983804.dist-info/top_level.txt +1 -0
@@ -0,0 +1,757 @@
1
+ # flake8: noqa
2
+ """
3
+ Shim module between Bleach and html5lib. This makes it easier to upgrade the
4
+ html5lib library without having to change a lot of code.
5
+ """
6
+
7
+ import re
8
+ import string
9
+ import warnings
10
+
11
+ # ignore html5lib deprecation warnings to use bleach; we are bleach
12
+ # apply before we import submodules that import html5lib
13
+ warnings.filterwarnings(
14
+ "ignore",
15
+ message="html5lib's sanitizer is deprecated",
16
+ category=DeprecationWarning,
17
+ module="bleach._vendor.html5lib",
18
+ )
19
+
20
+ from tensorbored._vendor.bleach._vendor.html5lib import ( # noqa: E402 module level import not at top of file
21
+ HTMLParser,
22
+ getTreeWalker,
23
+ )
24
+ from tensorbored._vendor.bleach._vendor.html5lib import (
25
+ constants,
26
+ ) # noqa: E402 module level import not at top of file
27
+ from tensorbored._vendor.bleach._vendor.html5lib.constants import ( # noqa: E402 module level import not at top of file
28
+ namespaces,
29
+ prefixes,
30
+ )
31
+ from tensorbored._vendor.bleach._vendor.html5lib.constants import (
32
+ _ReparseException as ReparseException,
33
+ ) # noqa: E402 module level import not at top of file
34
+ from tensorbored._vendor.bleach._vendor.html5lib.filters.base import (
35
+ Filter,
36
+ ) # noqa: E402 module level import not at top of file
37
+ from tensorbored._vendor.bleach._vendor.html5lib.filters.sanitizer import (
38
+ allowed_protocols,
39
+ allowed_css_properties,
40
+ allowed_svg_properties,
41
+ attr_val_is_uri,
42
+ svg_attr_val_allows_ref,
43
+ svg_allow_local_href,
44
+ ) # noqa: E402 module level import not at top of file
45
+ from tensorbored._vendor.bleach._vendor.html5lib.filters.sanitizer import (
46
+ Filter as SanitizerFilter,
47
+ ) # noqa: E402 module level import not at top of file
48
+ from tensorbored._vendor.bleach._vendor.html5lib._inputstream import (
49
+ HTMLInputStream,
50
+ ) # noqa: E402 module level import not at top of file
51
+ from tensorbored._vendor.bleach._vendor.html5lib.serializer import (
52
+ escape,
53
+ HTMLSerializer,
54
+ ) # noqa: E402 module level import not at top of file
55
+ from tensorbored._vendor.bleach._vendor.html5lib._tokenizer import (
56
+ attributeMap,
57
+ HTMLTokenizer,
58
+ ) # noqa: E402 module level import not at top of file
59
+ from tensorbored._vendor.bleach._vendor.html5lib._trie import (
60
+ Trie,
61
+ ) # noqa: E402 module level import not at top of file
62
+
63
+
64
+ #: Map of entity name to expanded entity
65
+ ENTITIES = constants.entities
66
+
67
+ #: Trie of html entity string -> character representation
68
+ ENTITIES_TRIE = Trie(ENTITIES)
69
+
70
+ #: Token type constants--these never change
71
+ TAG_TOKEN_TYPES = {
72
+ constants.tokenTypes["StartTag"],
73
+ constants.tokenTypes["EndTag"],
74
+ constants.tokenTypes["EmptyTag"],
75
+ }
76
+ TAG_TOKEN_TYPE_START = constants.tokenTypes["StartTag"]
77
+ TAG_TOKEN_TYPE_END = constants.tokenTypes["EndTag"]
78
+ TAG_TOKEN_TYPE_CHARACTERS = constants.tokenTypes["Characters"]
79
+ TAG_TOKEN_TYPE_PARSEERROR = constants.tokenTypes["ParseError"]
80
+
81
+
82
+ #: List of valid HTML tags, from WHATWG HTML Living Standard as of 2018-10-17
83
+ #: https://html.spec.whatwg.org/multipage/indices.html#elements-3
84
+ HTML_TAGS = frozenset(
85
+ (
86
+ "a",
87
+ "abbr",
88
+ "address",
89
+ "area",
90
+ "article",
91
+ "aside",
92
+ "audio",
93
+ "b",
94
+ "base",
95
+ "bdi",
96
+ "bdo",
97
+ "blockquote",
98
+ "body",
99
+ "br",
100
+ "button",
101
+ "canvas",
102
+ "caption",
103
+ "cite",
104
+ "code",
105
+ "col",
106
+ "colgroup",
107
+ "data",
108
+ "datalist",
109
+ "dd",
110
+ "del",
111
+ "details",
112
+ "dfn",
113
+ "dialog",
114
+ "div",
115
+ "dl",
116
+ "dt",
117
+ "em",
118
+ "embed",
119
+ "fieldset",
120
+ "figcaption",
121
+ "figure",
122
+ "footer",
123
+ "form",
124
+ "h1",
125
+ "h2",
126
+ "h3",
127
+ "h4",
128
+ "h5",
129
+ "h6",
130
+ "head",
131
+ "header",
132
+ "hgroup",
133
+ "hr",
134
+ "html",
135
+ "i",
136
+ "iframe",
137
+ "img",
138
+ "input",
139
+ "ins",
140
+ "kbd",
141
+ "keygen",
142
+ "label",
143
+ "legend",
144
+ "li",
145
+ "link",
146
+ "map",
147
+ "mark",
148
+ "menu",
149
+ "meta",
150
+ "meter",
151
+ "nav",
152
+ "noscript",
153
+ "object",
154
+ "ol",
155
+ "optgroup",
156
+ "option",
157
+ "output",
158
+ "p",
159
+ "param",
160
+ "picture",
161
+ "pre",
162
+ "progress",
163
+ "q",
164
+ "rp",
165
+ "rt",
166
+ "ruby",
167
+ "s",
168
+ "samp",
169
+ "script",
170
+ "section",
171
+ "select",
172
+ "slot",
173
+ "small",
174
+ "source",
175
+ "span",
176
+ "strong",
177
+ "style",
178
+ "sub",
179
+ "summary",
180
+ "sup",
181
+ "table",
182
+ "tbody",
183
+ "td",
184
+ "template",
185
+ "textarea",
186
+ "tfoot",
187
+ "th",
188
+ "thead",
189
+ "time",
190
+ "title",
191
+ "tr",
192
+ "track",
193
+ "u",
194
+ "ul",
195
+ "var",
196
+ "video",
197
+ "wbr",
198
+ )
199
+ )
200
+
201
+
202
+ #: List of block level HTML tags, as per https://github.com/mozilla/bleach/issues/369
203
+ #: from mozilla on 2019.07.11
204
+ #: https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements#Elements
205
+ HTML_TAGS_BLOCK_LEVEL = frozenset(
206
+ (
207
+ "address",
208
+ "article",
209
+ "aside",
210
+ "blockquote",
211
+ "details",
212
+ "dialog",
213
+ "dd",
214
+ "div",
215
+ "dl",
216
+ "dt",
217
+ "fieldset",
218
+ "figcaption",
219
+ "figure",
220
+ "footer",
221
+ "form",
222
+ "h1",
223
+ "h2",
224
+ "h3",
225
+ "h4",
226
+ "h5",
227
+ "h6",
228
+ "header",
229
+ "hgroup",
230
+ "hr",
231
+ "li",
232
+ "main",
233
+ "nav",
234
+ "ol",
235
+ "p",
236
+ "pre",
237
+ "section",
238
+ "table",
239
+ "ul",
240
+ )
241
+ )
242
+
243
+
244
+ class InputStreamWithMemory:
245
+ """Wraps an HTMLInputStream to remember characters since last <
246
+
247
+ This wraps existing HTMLInputStream classes to keep track of the stream
248
+ since the last < which marked an open tag state.
249
+
250
+ """
251
+
252
+ def __init__(self, inner_stream):
253
+ self._inner_stream = inner_stream
254
+ self.reset = self._inner_stream.reset
255
+ self.position = self._inner_stream.position
256
+ self._buffer = []
257
+
258
+ @property
259
+ def errors(self):
260
+ return self._inner_stream.errors
261
+
262
+ @property
263
+ def charEncoding(self):
264
+ return self._inner_stream.charEncoding
265
+
266
+ @property
267
+ def changeEncoding(self):
268
+ return self._inner_stream.changeEncoding
269
+
270
+ def char(self):
271
+ c = self._inner_stream.char()
272
+ # char() can return None if EOF, so ignore that
273
+ if c:
274
+ self._buffer.append(c)
275
+ return c
276
+
277
+ def charsUntil(self, characters, opposite=False):
278
+ chars = self._inner_stream.charsUntil(characters, opposite=opposite)
279
+ self._buffer.extend(list(chars))
280
+ return chars
281
+
282
+ def unget(self, char):
283
+ if self._buffer:
284
+ self._buffer.pop(-1)
285
+ return self._inner_stream.unget(char)
286
+
287
+ def get_tag(self):
288
+ """Returns the stream history since last '<'
289
+
290
+ Since the buffer starts at the last '<' as as seen by tagOpenState(),
291
+ we know that everything from that point to when this method is called
292
+ is the "tag" that is being tokenized.
293
+
294
+ """
295
+ return "".join(self._buffer)
296
+
297
+ def start_tag(self):
298
+ """Resets stream history to just '<'
299
+
300
+ This gets called by tagOpenState() which marks a '<' that denotes an
301
+ open tag. Any time we see that, we reset the buffer.
302
+
303
+ """
304
+ self._buffer = ["<"]
305
+
306
+
307
+ class BleachHTMLTokenizer(HTMLTokenizer):
308
+ """Tokenizer that doesn't consume character entities"""
309
+
310
+ def __init__(self, consume_entities=False, **kwargs):
311
+ super().__init__(**kwargs)
312
+
313
+ self.consume_entities = consume_entities
314
+
315
+ # Wrap the stream with one that remembers the history
316
+ self.stream = InputStreamWithMemory(self.stream)
317
+
318
+ # Remember the last token emitted; needed for block element spacing
319
+ self.emitted_last_token = None
320
+
321
+ def __iter__(self):
322
+ last_error_token = None
323
+
324
+ for token in super().__iter__():
325
+ if last_error_token is not None:
326
+ if (
327
+ last_error_token["data"] == "invalid-character-in-attribute-name"
328
+ and token["type"] in TAG_TOKEN_TYPES
329
+ and token.get("data")
330
+ ):
331
+ # token["data"] is an html5lib attributeMap
332
+ # (OrderedDict 3.7+ and dict otherwise)
333
+ # of attr name to attr value
334
+ #
335
+ # Remove attribute names that have ', " or < in them
336
+ # because those characters are invalid for attribute names.
337
+ token["data"] = attributeMap(
338
+ (attr_name, attr_value)
339
+ for attr_name, attr_value in token["data"].items()
340
+ if (
341
+ '"' not in attr_name
342
+ and "'" not in attr_name
343
+ and "<" not in attr_name
344
+ )
345
+ )
346
+ last_error_token = None
347
+ yield token
348
+
349
+ elif (
350
+ last_error_token["data"] == "expected-closing-tag-but-got-char"
351
+ and self.parser.tags is not None
352
+ and token["data"].lower().strip() not in self.parser.tags
353
+ ):
354
+ # We've got either a malformed tag or a pseudo-tag or
355
+ # something that html5lib wants to turn into a malformed
356
+ # comment which Bleach clean() will drop so we interfere
357
+ # with the token stream to handle it more correctly.
358
+ #
359
+ # If this is an allowed tag, it's malformed and we just let
360
+ # the html5lib parser deal with it--we don't enter into this
361
+ # block.
362
+ #
363
+ # If this is not an allowed tag, then we convert it to
364
+ # characters and it'll get escaped in the sanitizer.
365
+ token["data"] = self.stream.get_tag()
366
+ token["type"] = TAG_TOKEN_TYPE_CHARACTERS
367
+
368
+ last_error_token = None
369
+ yield token
370
+
371
+ elif token["type"] == TAG_TOKEN_TYPE_PARSEERROR:
372
+ # If the token is a parse error, then let the last_error_token
373
+ # go, and make token the new last_error_token
374
+ yield last_error_token
375
+ last_error_token = token
376
+
377
+ else:
378
+ yield last_error_token
379
+ yield token
380
+ last_error_token = None
381
+
382
+ continue
383
+
384
+ # If the token is a ParseError, we hold on to it so we can get the
385
+ # next token and potentially fix it.
386
+ if token["type"] == TAG_TOKEN_TYPE_PARSEERROR:
387
+ last_error_token = token
388
+ continue
389
+
390
+ yield token
391
+
392
+ if last_error_token:
393
+ if last_error_token["data"] == "eof-in-tag-name":
394
+ # Handle the case where the text being parsed ends with <
395
+ # followed by a series of characters. It's treated as a tag
396
+ # name that abruptly ends, but we should treat that like
397
+ # character data
398
+ yield {"type": TAG_TOKEN_TYPE_CHARACTERS, "data": self.stream.get_tag()}
399
+
400
+ elif last_error_token["data"] in (
401
+ "duplicate-attribute",
402
+ "eof-in-attribute-name",
403
+ "eof-in-attribute-value-no-quotes",
404
+ "expected-end-of-tag-but-got-eof",
405
+ ):
406
+ # Handle the case where the text being parsed ends with <
407
+ # followed by characters and then space and then:
408
+ #
409
+ # * more characters
410
+ # * more characters repeated with a space between (e.g. "abc abc")
411
+ # * more characters and then a space and then an EOF (e.g. "abc def ")
412
+ #
413
+ # These cases are treated as a tag name followed by an
414
+ # attribute that abruptly ends, but we should treat that like
415
+ # character data instead.
416
+ yield {"type": TAG_TOKEN_TYPE_CHARACTERS, "data": self.stream.get_tag()}
417
+
418
+ else:
419
+ yield last_error_token
420
+
421
+ def consumeEntity(self, allowedChar=None, fromAttribute=False):
422
+ # If this tokenizer is set to consume entities, then we can let the
423
+ # superclass do its thing.
424
+ if self.consume_entities:
425
+ return super().consumeEntity(allowedChar, fromAttribute)
426
+
427
+ # If this tokenizer is set to not consume entities, then we don't want
428
+ # to consume and convert them, so this overrides the html5lib tokenizer's
429
+ # consumeEntity so that it's now a no-op.
430
+ #
431
+ # However, when that gets called, it's consumed an &, so we put that back in
432
+ # the stream.
433
+ if fromAttribute:
434
+ self.currentToken["data"][-1][1] += "&"
435
+
436
+ else:
437
+ self.tokenQueue.append({"type": TAG_TOKEN_TYPE_CHARACTERS, "data": "&"})
438
+
439
+ def tagOpenState(self):
440
+ # This state marks a < that is either a StartTag, EndTag, EmptyTag,
441
+ # or ParseError. In all cases, we want to drop any stream history
442
+ # we've collected so far and we do that by calling start_tag() on
443
+ # the input stream wrapper.
444
+ self.stream.start_tag()
445
+ return super().tagOpenState()
446
+
447
+ def emitCurrentToken(self):
448
+ token = self.currentToken
449
+
450
+ if (
451
+ self.parser.tags is not None
452
+ and token["type"] in TAG_TOKEN_TYPES
453
+ and token["name"].lower() not in self.parser.tags
454
+ ):
455
+ # If this is a start/end/empty tag for a tag that's not in our
456
+ # allowed list, then it gets stripped or escaped. In both of these
457
+ # cases it gets converted to a Characters token.
458
+ if self.parser.strip:
459
+ if (
460
+ self.emitted_last_token
461
+ and token["type"] == TAG_TOKEN_TYPE_START
462
+ and token["name"].lower() in HTML_TAGS_BLOCK_LEVEL
463
+ ):
464
+ # If this is a block level tag we're stripping, we drop it
465
+ # for a newline because that's what a browser would parse
466
+ # it as
467
+ new_data = "\n"
468
+ else:
469
+ # For all other things being stripped, we throw in an empty
470
+ # string token
471
+ new_data = ""
472
+
473
+ else:
474
+ # If we're escaping the token, we want to escape the exact
475
+ # original string. Since tokenizing also normalizes data
476
+ # and this is a tag-like thing, we've lost some information.
477
+ # So we go back through the stream to get the original
478
+ # string and use that.
479
+ new_data = self.stream.get_tag()
480
+
481
+ new_token = {"type": TAG_TOKEN_TYPE_CHARACTERS, "data": new_data}
482
+
483
+ self.currentToken = self.emitted_last_token = new_token
484
+ self.tokenQueue.append(new_token)
485
+ self.state = self.dataState
486
+ return
487
+
488
+ self.emitted_last_token = self.currentToken
489
+ super().emitCurrentToken()
490
+
491
+
492
+ class BleachHTMLParser(HTMLParser):
493
+ """Parser that uses BleachHTMLTokenizer"""
494
+
495
+ def __init__(self, tags, strip, consume_entities, **kwargs):
496
+ """
497
+ :arg tags: set of allowed tags--everything else is either stripped or
498
+ escaped; if None, then this doesn't look at tags at all
499
+ :arg strip: whether to strip disallowed tags (True) or escape them (False);
500
+ if tags=None, then this doesn't have any effect
501
+ :arg consume_entities: whether to consume entities (default behavior) or
502
+ leave them as is when tokenizing (BleachHTMLTokenizer-added behavior)
503
+
504
+ """
505
+ self.tags = (
506
+ frozenset((tag.lower() for tag in tags)) if tags is not None else None
507
+ )
508
+ self.strip = strip
509
+ self.consume_entities = consume_entities
510
+ super().__init__(**kwargs)
511
+
512
+ def _parse(
513
+ self, stream, innerHTML=False, container="div", scripting=True, **kwargs
514
+ ):
515
+ # set scripting=True to parse <noscript> as though JS is enabled to
516
+ # match the expected context in browsers
517
+ #
518
+ # https://html.spec.whatwg.org/multipage/scripting.html#the-noscript-element
519
+ #
520
+ # Override HTMLParser so we can swap out the tokenizer for our own.
521
+ self.innerHTMLMode = innerHTML
522
+ self.container = container
523
+ self.scripting = scripting
524
+ self.tokenizer = BleachHTMLTokenizer(
525
+ stream=stream, consume_entities=self.consume_entities, parser=self, **kwargs
526
+ )
527
+ self.reset()
528
+
529
+ try:
530
+ self.mainLoop()
531
+ except ReparseException:
532
+ self.reset()
533
+ self.mainLoop()
534
+
535
+
536
+ def convert_entity(value):
537
+ """Convert an entity (minus the & and ; part) into what it represents
538
+
539
+ This handles numeric, hex, and text entities.
540
+
541
+ :arg value: the string (minus the ``&`` and ``;`` part) to convert
542
+
543
+ :returns: unicode character or None if it's an ambiguous ampersand that
544
+ doesn't match a character entity
545
+
546
+ """
547
+ if value[0] == "#":
548
+ if len(value) < 2:
549
+ return None
550
+
551
+ if value[1] in ("x", "X"):
552
+ # hex-encoded code point
553
+ int_as_string, base = value[2:], 16
554
+ else:
555
+ # decimal code point
556
+ int_as_string, base = value[1:], 10
557
+
558
+ if int_as_string == "":
559
+ return None
560
+
561
+ code_point = int(int_as_string, base)
562
+ if 0 < code_point < 0x110000:
563
+ return chr(code_point)
564
+ else:
565
+ return None
566
+
567
+ return ENTITIES.get(value, None)
568
+
569
+
570
+ def convert_entities(text):
571
+ """Converts all found entities in the text
572
+
573
+ :arg text: the text to convert entities in
574
+
575
+ :returns: unicode text with converted entities
576
+
577
+ """
578
+ if "&" not in text:
579
+ return text
580
+
581
+ new_text = []
582
+ for part in next_possible_entity(text):
583
+ if not part:
584
+ continue
585
+
586
+ if part.startswith("&"):
587
+ entity = match_entity(part)
588
+ if entity is not None:
589
+ converted = convert_entity(entity)
590
+
591
+ # If it's not an ambiguous ampersand, then replace with the
592
+ # unicode character. Otherwise, we leave the entity in.
593
+ if converted is not None:
594
+ new_text.append(converted)
595
+ remainder = part[len(entity) + 2 :]
596
+ if part:
597
+ new_text.append(remainder)
598
+ continue
599
+
600
+ new_text.append(part)
601
+
602
+ return "".join(new_text)
603
+
604
+
605
+ def match_entity(stream):
606
+ """Returns first entity in stream or None if no entity exists
607
+
608
+ Note: For Bleach purposes, entities must start with a "&" and end with a
609
+ ";". This ignores ambiguous character entities that have no ";" at the end.
610
+
611
+ :arg stream: the character stream
612
+
613
+ :returns: the entity string without "&" or ";" if it's a valid character
614
+ entity; ``None`` otherwise
615
+
616
+ """
617
+ # Nix the & at the beginning
618
+ if stream[0] != "&":
619
+ raise ValueError('Stream should begin with "&"')
620
+
621
+ stream = stream[1:]
622
+
623
+ stream = list(stream)
624
+ possible_entity = ""
625
+ end_characters = "<&=;" + string.whitespace
626
+
627
+ # Handle number entities
628
+ if stream and stream[0] == "#":
629
+ possible_entity = "#"
630
+ stream.pop(0)
631
+
632
+ if stream and stream[0] in ("x", "X"):
633
+ allowed = "0123456789abcdefABCDEF"
634
+ possible_entity += stream.pop(0)
635
+ else:
636
+ allowed = "0123456789"
637
+
638
+ # FIXME(willkg): Do we want to make sure these are valid number
639
+ # entities? This doesn't do that currently.
640
+ while stream and stream[0] not in end_characters:
641
+ c = stream.pop(0)
642
+ if c not in allowed:
643
+ break
644
+ possible_entity += c
645
+
646
+ if possible_entity and stream and stream[0] == ";":
647
+ return possible_entity
648
+ return None
649
+
650
+ # Handle character entities
651
+ while stream and stream[0] not in end_characters:
652
+ c = stream.pop(0)
653
+ possible_entity += c
654
+ if not ENTITIES_TRIE.has_keys_with_prefix(possible_entity):
655
+ # If it's not a prefix, then it's not an entity and we're
656
+ # out
657
+ return None
658
+
659
+ if possible_entity and stream and stream[0] == ";":
660
+ return possible_entity
661
+
662
+ return None
663
+
664
+
665
+ AMP_SPLIT_RE = re.compile("(&)")
666
+
667
+
668
+ def next_possible_entity(text):
669
+ """Takes a text and generates a list of possible entities
670
+
671
+ :arg text: the text to look at
672
+
673
+ :returns: generator where each part (except the first) starts with an
674
+ "&"
675
+
676
+ """
677
+ for i, part in enumerate(AMP_SPLIT_RE.split(text)):
678
+ if i == 0:
679
+ yield part
680
+ elif i % 2 == 0:
681
+ yield "&" + part
682
+
683
+
684
+ class BleachHTMLSerializer(HTMLSerializer):
685
+ """HTMLSerializer that undoes & -> &amp; in attributes and sets
686
+ escape_rcdata to True
687
+ """
688
+
689
+ # per the HTMLSerializer.__init__ docstring:
690
+ #
691
+ # Whether to escape characters that need to be
692
+ # escaped within normal elements within rcdata elements such as
693
+ # style.
694
+ #
695
+ escape_rcdata = True
696
+
697
+ def escape_base_amp(self, stoken):
698
+ """Escapes just bare & in HTML attribute values"""
699
+ # First, undo escaping of &. We need to do this because html5lib's
700
+ # HTMLSerializer expected the tokenizer to consume all the character
701
+ # entities and convert them to their respective characters, but the
702
+ # BleachHTMLTokenizer doesn't do that. For example, this fixes
703
+ # &amp;entity; back to &entity; .
704
+ stoken = stoken.replace("&amp;", "&")
705
+
706
+ # However, we do want all bare & that are not marking character
707
+ # entities to be changed to &amp;, so let's do that carefully here.
708
+ for part in next_possible_entity(stoken):
709
+ if not part:
710
+ continue
711
+
712
+ if part.startswith("&"):
713
+ entity = match_entity(part)
714
+ # Only leave entities in that are not ambiguous. If they're
715
+ # ambiguous, then we escape the ampersand.
716
+ if entity is not None and convert_entity(entity) is not None:
717
+ yield f"&{entity};"
718
+
719
+ # Length of the entity plus 2--one for & at the beginning
720
+ # and one for ; at the end
721
+ part = part[len(entity) + 2 :]
722
+ if part:
723
+ yield part
724
+ continue
725
+
726
+ yield part.replace("&", "&amp;")
727
+
728
+ def serialize(self, treewalker, encoding=None):
729
+ """Wrap HTMLSerializer.serialize and conver & to &amp; in attribute values
730
+
731
+ Note that this converts & to &amp; in attribute values where the & isn't
732
+ already part of an unambiguous character entity.
733
+
734
+ """
735
+ in_tag = False
736
+ after_equals = False
737
+
738
+ for stoken in super().serialize(treewalker, encoding):
739
+ if in_tag:
740
+ if stoken == ">":
741
+ in_tag = False
742
+
743
+ elif after_equals:
744
+ if stoken != '"':
745
+ yield from self.escape_base_amp(stoken)
746
+
747
+ after_equals = False
748
+ continue
749
+
750
+ elif stoken == "=":
751
+ after_equals = True
752
+
753
+ yield stoken
754
+ else:
755
+ if stoken.startswith("<"):
756
+ in_tag = True
757
+ yield stoken