tensorbored 2.21.0rc1769983804__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (271) hide show
  1. tensorbored/__init__.py +112 -0
  2. tensorbored/_vendor/__init__.py +0 -0
  3. tensorbored/_vendor/bleach/__init__.py +125 -0
  4. tensorbored/_vendor/bleach/_vendor/__init__.py +0 -0
  5. tensorbored/_vendor/bleach/_vendor/html5lib/__init__.py +35 -0
  6. tensorbored/_vendor/bleach/_vendor/html5lib/_ihatexml.py +289 -0
  7. tensorbored/_vendor/bleach/_vendor/html5lib/_inputstream.py +918 -0
  8. tensorbored/_vendor/bleach/_vendor/html5lib/_tokenizer.py +1735 -0
  9. tensorbored/_vendor/bleach/_vendor/html5lib/_trie/__init__.py +5 -0
  10. tensorbored/_vendor/bleach/_vendor/html5lib/_trie/_base.py +40 -0
  11. tensorbored/_vendor/bleach/_vendor/html5lib/_trie/py.py +67 -0
  12. tensorbored/_vendor/bleach/_vendor/html5lib/_utils.py +159 -0
  13. tensorbored/_vendor/bleach/_vendor/html5lib/constants.py +2946 -0
  14. tensorbored/_vendor/bleach/_vendor/html5lib/filters/__init__.py +0 -0
  15. tensorbored/_vendor/bleach/_vendor/html5lib/filters/alphabeticalattributes.py +29 -0
  16. tensorbored/_vendor/bleach/_vendor/html5lib/filters/base.py +12 -0
  17. tensorbored/_vendor/bleach/_vendor/html5lib/filters/inject_meta_charset.py +73 -0
  18. tensorbored/_vendor/bleach/_vendor/html5lib/filters/lint.py +93 -0
  19. tensorbored/_vendor/bleach/_vendor/html5lib/filters/optionaltags.py +207 -0
  20. tensorbored/_vendor/bleach/_vendor/html5lib/filters/sanitizer.py +916 -0
  21. tensorbored/_vendor/bleach/_vendor/html5lib/filters/whitespace.py +38 -0
  22. tensorbored/_vendor/bleach/_vendor/html5lib/html5parser.py +2795 -0
  23. tensorbored/_vendor/bleach/_vendor/html5lib/serializer.py +409 -0
  24. tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/__init__.py +30 -0
  25. tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/genshi.py +54 -0
  26. tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/sax.py +50 -0
  27. tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/__init__.py +88 -0
  28. tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/base.py +417 -0
  29. tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/dom.py +239 -0
  30. tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/etree.py +343 -0
  31. tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/etree_lxml.py +392 -0
  32. tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/__init__.py +154 -0
  33. tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/base.py +252 -0
  34. tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/dom.py +43 -0
  35. tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/etree.py +131 -0
  36. tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/etree_lxml.py +215 -0
  37. tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/genshi.py +69 -0
  38. tensorbored/_vendor/bleach/_vendor/parse.py +1078 -0
  39. tensorbored/_vendor/bleach/callbacks.py +32 -0
  40. tensorbored/_vendor/bleach/html5lib_shim.py +757 -0
  41. tensorbored/_vendor/bleach/linkifier.py +633 -0
  42. tensorbored/_vendor/bleach/parse_shim.py +1 -0
  43. tensorbored/_vendor/bleach/sanitizer.py +638 -0
  44. tensorbored/_vendor/bleach/six_shim.py +19 -0
  45. tensorbored/_vendor/webencodings/__init__.py +342 -0
  46. tensorbored/_vendor/webencodings/labels.py +231 -0
  47. tensorbored/_vendor/webencodings/mklabels.py +59 -0
  48. tensorbored/_vendor/webencodings/x_user_defined.py +325 -0
  49. tensorbored/assets.py +36 -0
  50. tensorbored/auth.py +102 -0
  51. tensorbored/backend/__init__.py +0 -0
  52. tensorbored/backend/application.py +604 -0
  53. tensorbored/backend/auth_context_middleware.py +38 -0
  54. tensorbored/backend/client_feature_flags.py +113 -0
  55. tensorbored/backend/empty_path_redirect.py +46 -0
  56. tensorbored/backend/event_processing/__init__.py +0 -0
  57. tensorbored/backend/event_processing/data_ingester.py +276 -0
  58. tensorbored/backend/event_processing/data_provider.py +535 -0
  59. tensorbored/backend/event_processing/directory_loader.py +142 -0
  60. tensorbored/backend/event_processing/directory_watcher.py +272 -0
  61. tensorbored/backend/event_processing/event_accumulator.py +950 -0
  62. tensorbored/backend/event_processing/event_file_inspector.py +463 -0
  63. tensorbored/backend/event_processing/event_file_loader.py +292 -0
  64. tensorbored/backend/event_processing/event_multiplexer.py +521 -0
  65. tensorbored/backend/event_processing/event_util.py +68 -0
  66. tensorbored/backend/event_processing/io_wrapper.py +223 -0
  67. tensorbored/backend/event_processing/plugin_asset_util.py +104 -0
  68. tensorbored/backend/event_processing/plugin_event_accumulator.py +721 -0
  69. tensorbored/backend/event_processing/plugin_event_multiplexer.py +522 -0
  70. tensorbored/backend/event_processing/reservoir.py +266 -0
  71. tensorbored/backend/event_processing/tag_types.py +29 -0
  72. tensorbored/backend/experiment_id.py +71 -0
  73. tensorbored/backend/experimental_plugin.py +51 -0
  74. tensorbored/backend/http_util.py +263 -0
  75. tensorbored/backend/json_util.py +70 -0
  76. tensorbored/backend/path_prefix.py +67 -0
  77. tensorbored/backend/process_graph.py +74 -0
  78. tensorbored/backend/security_validator.py +202 -0
  79. tensorbored/compat/__init__.py +69 -0
  80. tensorbored/compat/proto/__init__.py +0 -0
  81. tensorbored/compat/proto/allocation_description_pb2.py +35 -0
  82. tensorbored/compat/proto/api_def_pb2.py +82 -0
  83. tensorbored/compat/proto/attr_value_pb2.py +80 -0
  84. tensorbored/compat/proto/cluster_pb2.py +58 -0
  85. tensorbored/compat/proto/config_pb2.py +271 -0
  86. tensorbored/compat/proto/coordination_config_pb2.py +45 -0
  87. tensorbored/compat/proto/cost_graph_pb2.py +87 -0
  88. tensorbored/compat/proto/cpp_shape_inference_pb2.py +70 -0
  89. tensorbored/compat/proto/debug_pb2.py +65 -0
  90. tensorbored/compat/proto/event_pb2.py +149 -0
  91. tensorbored/compat/proto/full_type_pb2.py +74 -0
  92. tensorbored/compat/proto/function_pb2.py +157 -0
  93. tensorbored/compat/proto/graph_debug_info_pb2.py +111 -0
  94. tensorbored/compat/proto/graph_pb2.py +41 -0
  95. tensorbored/compat/proto/histogram_pb2.py +39 -0
  96. tensorbored/compat/proto/meta_graph_pb2.py +254 -0
  97. tensorbored/compat/proto/node_def_pb2.py +61 -0
  98. tensorbored/compat/proto/op_def_pb2.py +81 -0
  99. tensorbored/compat/proto/resource_handle_pb2.py +48 -0
  100. tensorbored/compat/proto/rewriter_config_pb2.py +93 -0
  101. tensorbored/compat/proto/rpc_options_pb2.py +35 -0
  102. tensorbored/compat/proto/saved_object_graph_pb2.py +193 -0
  103. tensorbored/compat/proto/saver_pb2.py +38 -0
  104. tensorbored/compat/proto/step_stats_pb2.py +116 -0
  105. tensorbored/compat/proto/struct_pb2.py +144 -0
  106. tensorbored/compat/proto/summary_pb2.py +111 -0
  107. tensorbored/compat/proto/tensor_description_pb2.py +38 -0
  108. tensorbored/compat/proto/tensor_pb2.py +68 -0
  109. tensorbored/compat/proto/tensor_shape_pb2.py +46 -0
  110. tensorbored/compat/proto/tfprof_log_pb2.py +307 -0
  111. tensorbored/compat/proto/trackable_object_graph_pb2.py +90 -0
  112. tensorbored/compat/proto/types_pb2.py +105 -0
  113. tensorbored/compat/proto/variable_pb2.py +62 -0
  114. tensorbored/compat/proto/verifier_config_pb2.py +38 -0
  115. tensorbored/compat/proto/versions_pb2.py +35 -0
  116. tensorbored/compat/tensorflow_stub/__init__.py +38 -0
  117. tensorbored/compat/tensorflow_stub/app.py +124 -0
  118. tensorbored/compat/tensorflow_stub/compat/__init__.py +131 -0
  119. tensorbored/compat/tensorflow_stub/compat/v1/__init__.py +20 -0
  120. tensorbored/compat/tensorflow_stub/dtypes.py +692 -0
  121. tensorbored/compat/tensorflow_stub/error_codes.py +169 -0
  122. tensorbored/compat/tensorflow_stub/errors.py +507 -0
  123. tensorbored/compat/tensorflow_stub/flags.py +124 -0
  124. tensorbored/compat/tensorflow_stub/io/__init__.py +17 -0
  125. tensorbored/compat/tensorflow_stub/io/gfile.py +1011 -0
  126. tensorbored/compat/tensorflow_stub/pywrap_tensorflow.py +285 -0
  127. tensorbored/compat/tensorflow_stub/tensor_shape.py +1035 -0
  128. tensorbored/context.py +129 -0
  129. tensorbored/data/__init__.py +0 -0
  130. tensorbored/data/grpc_provider.py +365 -0
  131. tensorbored/data/ingester.py +46 -0
  132. tensorbored/data/proto/__init__.py +0 -0
  133. tensorbored/data/proto/data_provider_pb2.py +517 -0
  134. tensorbored/data/proto/data_provider_pb2_grpc.py +374 -0
  135. tensorbored/data/provider.py +1365 -0
  136. tensorbored/data/server_ingester.py +301 -0
  137. tensorbored/data_compat.py +159 -0
  138. tensorbored/dataclass_compat.py +224 -0
  139. tensorbored/default.py +124 -0
  140. tensorbored/errors.py +130 -0
  141. tensorbored/lazy.py +99 -0
  142. tensorbored/main.py +48 -0
  143. tensorbored/main_lib.py +62 -0
  144. tensorbored/manager.py +487 -0
  145. tensorbored/notebook.py +441 -0
  146. tensorbored/plugin_util.py +266 -0
  147. tensorbored/plugins/__init__.py +0 -0
  148. tensorbored/plugins/audio/__init__.py +0 -0
  149. tensorbored/plugins/audio/audio_plugin.py +229 -0
  150. tensorbored/plugins/audio/metadata.py +69 -0
  151. tensorbored/plugins/audio/plugin_data_pb2.py +37 -0
  152. tensorbored/plugins/audio/summary.py +230 -0
  153. tensorbored/plugins/audio/summary_v2.py +124 -0
  154. tensorbored/plugins/base_plugin.py +367 -0
  155. tensorbored/plugins/core/__init__.py +0 -0
  156. tensorbored/plugins/core/core_plugin.py +981 -0
  157. tensorbored/plugins/custom_scalar/__init__.py +0 -0
  158. tensorbored/plugins/custom_scalar/custom_scalars_plugin.py +320 -0
  159. tensorbored/plugins/custom_scalar/layout_pb2.py +85 -0
  160. tensorbored/plugins/custom_scalar/metadata.py +35 -0
  161. tensorbored/plugins/custom_scalar/summary.py +79 -0
  162. tensorbored/plugins/debugger_v2/__init__.py +0 -0
  163. tensorbored/plugins/debugger_v2/debug_data_multiplexer.py +631 -0
  164. tensorbored/plugins/debugger_v2/debug_data_provider.py +634 -0
  165. tensorbored/plugins/debugger_v2/debugger_v2_plugin.py +504 -0
  166. tensorbored/plugins/distribution/__init__.py +0 -0
  167. tensorbored/plugins/distribution/compressor.py +158 -0
  168. tensorbored/plugins/distribution/distributions_plugin.py +116 -0
  169. tensorbored/plugins/distribution/metadata.py +19 -0
  170. tensorbored/plugins/graph/__init__.py +0 -0
  171. tensorbored/plugins/graph/graph_util.py +129 -0
  172. tensorbored/plugins/graph/graphs_plugin.py +336 -0
  173. tensorbored/plugins/graph/keras_util.py +328 -0
  174. tensorbored/plugins/graph/metadata.py +42 -0
  175. tensorbored/plugins/histogram/__init__.py +0 -0
  176. tensorbored/plugins/histogram/histograms_plugin.py +144 -0
  177. tensorbored/plugins/histogram/metadata.py +63 -0
  178. tensorbored/plugins/histogram/plugin_data_pb2.py +34 -0
  179. tensorbored/plugins/histogram/summary.py +234 -0
  180. tensorbored/plugins/histogram/summary_v2.py +292 -0
  181. tensorbored/plugins/hparams/__init__.py +14 -0
  182. tensorbored/plugins/hparams/_keras.py +93 -0
  183. tensorbored/plugins/hparams/api.py +130 -0
  184. tensorbored/plugins/hparams/api_pb2.py +208 -0
  185. tensorbored/plugins/hparams/backend_context.py +606 -0
  186. tensorbored/plugins/hparams/download_data.py +158 -0
  187. tensorbored/plugins/hparams/error.py +26 -0
  188. tensorbored/plugins/hparams/get_experiment.py +71 -0
  189. tensorbored/plugins/hparams/hparams_plugin.py +206 -0
  190. tensorbored/plugins/hparams/hparams_util_pb2.py +69 -0
  191. tensorbored/plugins/hparams/json_format_compat.py +38 -0
  192. tensorbored/plugins/hparams/list_metric_evals.py +57 -0
  193. tensorbored/plugins/hparams/list_session_groups.py +1040 -0
  194. tensorbored/plugins/hparams/metadata.py +125 -0
  195. tensorbored/plugins/hparams/metrics.py +41 -0
  196. tensorbored/plugins/hparams/plugin_data_pb2.py +69 -0
  197. tensorbored/plugins/hparams/summary.py +205 -0
  198. tensorbored/plugins/hparams/summary_v2.py +597 -0
  199. tensorbored/plugins/image/__init__.py +0 -0
  200. tensorbored/plugins/image/images_plugin.py +232 -0
  201. tensorbored/plugins/image/metadata.py +65 -0
  202. tensorbored/plugins/image/plugin_data_pb2.py +34 -0
  203. tensorbored/plugins/image/summary.py +159 -0
  204. tensorbored/plugins/image/summary_v2.py +130 -0
  205. tensorbored/plugins/mesh/__init__.py +14 -0
  206. tensorbored/plugins/mesh/mesh_plugin.py +292 -0
  207. tensorbored/plugins/mesh/metadata.py +152 -0
  208. tensorbored/plugins/mesh/plugin_data_pb2.py +37 -0
  209. tensorbored/plugins/mesh/summary.py +251 -0
  210. tensorbored/plugins/mesh/summary_v2.py +214 -0
  211. tensorbored/plugins/metrics/__init__.py +0 -0
  212. tensorbored/plugins/metrics/metadata.py +17 -0
  213. tensorbored/plugins/metrics/metrics_plugin.py +623 -0
  214. tensorbored/plugins/pr_curve/__init__.py +0 -0
  215. tensorbored/plugins/pr_curve/metadata.py +75 -0
  216. tensorbored/plugins/pr_curve/plugin_data_pb2.py +34 -0
  217. tensorbored/plugins/pr_curve/pr_curves_plugin.py +241 -0
  218. tensorbored/plugins/pr_curve/summary.py +574 -0
  219. tensorbored/plugins/profile_redirect/__init__.py +0 -0
  220. tensorbored/plugins/profile_redirect/profile_redirect_plugin.py +49 -0
  221. tensorbored/plugins/projector/__init__.py +67 -0
  222. tensorbored/plugins/projector/metadata.py +26 -0
  223. tensorbored/plugins/projector/projector_config_pb2.py +54 -0
  224. tensorbored/plugins/projector/projector_plugin.py +795 -0
  225. tensorbored/plugins/projector/tf_projector_plugin/index.js +32 -0
  226. tensorbored/plugins/projector/tf_projector_plugin/projector_binary.html +524 -0
  227. tensorbored/plugins/projector/tf_projector_plugin/projector_binary.js +15536 -0
  228. tensorbored/plugins/scalar/__init__.py +0 -0
  229. tensorbored/plugins/scalar/metadata.py +60 -0
  230. tensorbored/plugins/scalar/plugin_data_pb2.py +34 -0
  231. tensorbored/plugins/scalar/scalars_plugin.py +181 -0
  232. tensorbored/plugins/scalar/summary.py +109 -0
  233. tensorbored/plugins/scalar/summary_v2.py +124 -0
  234. tensorbored/plugins/text/__init__.py +0 -0
  235. tensorbored/plugins/text/metadata.py +62 -0
  236. tensorbored/plugins/text/plugin_data_pb2.py +34 -0
  237. tensorbored/plugins/text/summary.py +114 -0
  238. tensorbored/plugins/text/summary_v2.py +124 -0
  239. tensorbored/plugins/text/text_plugin.py +288 -0
  240. tensorbored/plugins/wit_redirect/__init__.py +0 -0
  241. tensorbored/plugins/wit_redirect/wit_redirect_plugin.py +49 -0
  242. tensorbored/program.py +910 -0
  243. tensorbored/summary/__init__.py +35 -0
  244. tensorbored/summary/_output.py +124 -0
  245. tensorbored/summary/_tf/__init__.py +14 -0
  246. tensorbored/summary/_tf/summary/__init__.py +178 -0
  247. tensorbored/summary/_writer.py +105 -0
  248. tensorbored/summary/v1.py +51 -0
  249. tensorbored/summary/v2.py +25 -0
  250. tensorbored/summary/writer/__init__.py +13 -0
  251. tensorbored/summary/writer/event_file_writer.py +291 -0
  252. tensorbored/summary/writer/record_writer.py +50 -0
  253. tensorbored/util/__init__.py +0 -0
  254. tensorbored/util/encoder.py +116 -0
  255. tensorbored/util/grpc_util.py +311 -0
  256. tensorbored/util/img_mime_type_detector.py +40 -0
  257. tensorbored/util/io_util.py +20 -0
  258. tensorbored/util/lazy_tensor_creator.py +110 -0
  259. tensorbored/util/op_evaluator.py +104 -0
  260. tensorbored/util/platform_util.py +20 -0
  261. tensorbored/util/tb_logging.py +24 -0
  262. tensorbored/util/tensor_util.py +617 -0
  263. tensorbored/util/timing.py +122 -0
  264. tensorbored/version.py +21 -0
  265. tensorbored/webfiles.zip +0 -0
  266. tensorbored-2.21.0rc1769983804.dist-info/METADATA +49 -0
  267. tensorbored-2.21.0rc1769983804.dist-info/RECORD +271 -0
  268. tensorbored-2.21.0rc1769983804.dist-info/WHEEL +5 -0
  269. tensorbored-2.21.0rc1769983804.dist-info/entry_points.txt +6 -0
  270. tensorbored-2.21.0rc1769983804.dist-info/licenses/LICENSE +739 -0
  271. tensorbored-2.21.0rc1769983804.dist-info/top_level.txt +1 -0
@@ -0,0 +1,633 @@
1
+ import re
2
+
3
+ from urllib.parse import quote
4
+
5
+ from tensorbored._vendor.bleach import callbacks as linkify_callbacks
6
+ from tensorbored._vendor.bleach import html5lib_shim
7
+
8
+
9
+ #: List of default callbacks
10
+ DEFAULT_CALLBACKS = [linkify_callbacks.nofollow]
11
+
12
+
13
+ TLDS = """ac ad ae aero af ag ai al am an ao aq ar arpa as asia at au aw ax az
14
+ ba bb bd be bf bg bh bi biz bj bm bn bo br bs bt bv bw by bz ca cat
15
+ cc cd cf cg ch ci ck cl cm cn co com coop cr cu cv cx cy cz de dj dk
16
+ dm do dz ec edu ee eg er es et eu fi fj fk fm fo fr ga gb gd ge gf gg
17
+ gh gi gl gm gn gov gp gq gr gs gt gu gw gy hk hm hn hr ht hu id ie il
18
+ im in info int io iq ir is it je jm jo jobs jp ke kg kh ki km kn kp
19
+ kr kw ky kz la lb lc li lk lr ls lt lu lv ly ma mc md me mg mh mil mk
20
+ ml mm mn mo mobi mp mq mr ms mt mu museum mv mw mx my mz na name nc ne
21
+ net nf ng ni nl no np nr nu nz om org pa pe pf pg ph pk pl pm pn post
22
+ pr pro ps pt pw py qa re ro rs ru rw sa sb sc sd se sg sh si sj sk sl
23
+ sm sn so sr ss st su sv sx sy sz tc td tel tf tg th tj tk tl tm tn to
24
+ tp tr travel tt tv tw tz ua ug uk us uy uz va vc ve vg vi vn vu wf ws
25
+ xn xxx ye yt yu za zm zw""".split()
26
+
27
+ # Make sure that .com doesn't get matched by .co first
28
+ TLDS.reverse()
29
+
30
+
31
+ def build_url_re(tlds=TLDS, protocols=html5lib_shim.allowed_protocols):
32
+ """Builds the url regex used by linkifier
33
+
34
+ If you want a different set of tlds or allowed protocols, pass those in
35
+ and stomp on the existing ``url_re``::
36
+
37
+ from bleach import linkifier
38
+
39
+ my_url_re = linkifier.build_url_re(my_tlds_list, my_protocols)
40
+
41
+ linker = LinkifyFilter(url_re=my_url_re)
42
+
43
+ """
44
+ return re.compile(
45
+ r"""\(* # Match any opening parentheses.
46
+ \b(?<![@.])(?:(?:{0}):/{{0,3}}(?:(?:\w+:)?\w+@)?)? # http://
47
+ ([\w-]+\.)+(?:{1})(?:\:[0-9]+)?(?!\.\w)\b # xx.yy.tld(:##)?
48
+ (?:[/?][^\s\{{\}}\|\\\^`<>"]*)?
49
+ # /path/zz (excluding "unsafe" chars from RFC 3986,
50
+ # except for # and ~, which happen in practice)
51
+ """.format(
52
+ "|".join(sorted(protocols)), "|".join(sorted(tlds))
53
+ ),
54
+ re.IGNORECASE | re.VERBOSE | re.UNICODE,
55
+ )
56
+
57
+
58
+ URL_RE = build_url_re()
59
+
60
+
61
+ PROTO_RE = re.compile(r"^[\w-]+:/{0,3}", re.IGNORECASE)
62
+
63
+
64
+ def build_email_re(tlds=TLDS):
65
+ """Builds the email regex used by linkifier
66
+
67
+ If you want a different set of tlds, pass those in and stomp on the existing ``email_re``::
68
+
69
+ from bleach import linkifier
70
+
71
+ my_email_re = linkifier.build_email_re(my_tlds_list)
72
+
73
+ linker = LinkifyFilter(email_re=my_url_re)
74
+
75
+ """
76
+ # open and closing braces doubled below for format string
77
+ return re.compile(
78
+ r"""(?<!//)
79
+ (([-!#$%&'*+/=?^_`{{}}|~0-9A-Z]+
80
+ (\.[-!#$%&'*+/=?^_`{{}}|~0-9A-Z]+)* # dot-atom
81
+ |^"([\001-\010\013\014\016-\037!#-\[\]-\177]
82
+ |\\[\001-\011\013\014\016-\177])*" # quoted-string
83
+ )@(?:[A-Z0-9](?:[A-Z0-9-]{{0,61}}[A-Z0-9])?\.)+(?:{0})) # domain
84
+ """.format(
85
+ "|".join(tlds)
86
+ ),
87
+ re.IGNORECASE | re.MULTILINE | re.VERBOSE,
88
+ )
89
+
90
+
91
+ EMAIL_RE = build_email_re()
92
+
93
+
94
+ class Linker:
95
+ """Convert URL-like strings in an HTML fragment to links
96
+
97
+ This function converts strings that look like URLs, domain names and email
98
+ addresses in text that may be an HTML fragment to links, while preserving:
99
+
100
+ 1. links already in the string
101
+ 2. urls found in attributes
102
+ 3. email addresses
103
+
104
+ linkify does a best-effort approach and tries to recover from bad
105
+ situations due to crazy text.
106
+
107
+ """
108
+
109
+ def __init__(
110
+ self,
111
+ callbacks=DEFAULT_CALLBACKS,
112
+ skip_tags=None,
113
+ parse_email=False,
114
+ url_re=URL_RE,
115
+ email_re=EMAIL_RE,
116
+ recognized_tags=html5lib_shim.HTML_TAGS,
117
+ ):
118
+ """Creates a Linker instance
119
+
120
+ :arg list callbacks: list of callbacks to run when adjusting tag attributes;
121
+ defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
122
+
123
+ :arg set skip_tags: set of tags that you don't want to linkify the
124
+ contents of; for example, you could set this to ``{'pre'}`` to skip
125
+ linkifying contents of ``pre`` tags; ``None`` means you don't
126
+ want linkify to skip any tags
127
+
128
+ :arg bool parse_email: whether or not to linkify email addresses
129
+
130
+ :arg url_re: url matching regex
131
+
132
+ :arg email_re: email matching regex
133
+
134
+ :arg set recognized_tags: the set of tags that linkify knows about;
135
+ everything else gets escaped
136
+
137
+ :returns: linkified text as unicode
138
+
139
+ """
140
+ self.callbacks = callbacks
141
+ self.skip_tags = skip_tags
142
+ self.parse_email = parse_email
143
+ self.url_re = url_re
144
+ self.email_re = email_re
145
+
146
+ # Create a parser/tokenizer that allows all HTML tags and escapes
147
+ # anything not in that list.
148
+ self.parser = html5lib_shim.BleachHTMLParser(
149
+ tags=frozenset(recognized_tags),
150
+ strip=False,
151
+ consume_entities=False,
152
+ namespaceHTMLElements=False,
153
+ )
154
+ self.walker = html5lib_shim.getTreeWalker("etree")
155
+ self.serializer = html5lib_shim.BleachHTMLSerializer(
156
+ quote_attr_values="always",
157
+ omit_optional_tags=False,
158
+ # We want to leave entities as they are without escaping or
159
+ # resolving or expanding
160
+ resolve_entities=False,
161
+ # linkify does not sanitize
162
+ sanitize=False,
163
+ # linkify preserves attr order
164
+ alphabetical_attributes=False,
165
+ )
166
+
167
+ def linkify(self, text):
168
+ """Linkify specified text
169
+
170
+ :arg str text: the text to add links to
171
+
172
+ :returns: linkified text as unicode
173
+
174
+ :raises TypeError: if ``text`` is not a text type
175
+
176
+ """
177
+ if not isinstance(text, str):
178
+ raise TypeError("argument must be of text type")
179
+
180
+ if not text:
181
+ return ""
182
+
183
+ dom = self.parser.parseFragment(text)
184
+ filtered = LinkifyFilter(
185
+ source=self.walker(dom),
186
+ callbacks=self.callbacks,
187
+ skip_tags=self.skip_tags,
188
+ parse_email=self.parse_email,
189
+ url_re=self.url_re,
190
+ email_re=self.email_re,
191
+ )
192
+ return self.serializer.render(filtered)
193
+
194
+
195
+ class LinkifyFilter(html5lib_shim.Filter):
196
+ """html5lib filter that linkifies text
197
+
198
+ This will do the following:
199
+
200
+ * convert email addresses into links
201
+ * convert urls into links
202
+ * edit existing links by running them through callbacks--the default is to
203
+ add a ``rel="nofollow"``
204
+
205
+ This filter can be used anywhere html5lib filters can be used.
206
+
207
+ """
208
+
209
+ def __init__(
210
+ self,
211
+ source,
212
+ callbacks=DEFAULT_CALLBACKS,
213
+ skip_tags=None,
214
+ parse_email=False,
215
+ url_re=URL_RE,
216
+ email_re=EMAIL_RE,
217
+ ):
218
+ """Creates a LinkifyFilter instance
219
+
220
+ :arg source: stream as an html5lib TreeWalker
221
+
222
+ :arg list callbacks: list of callbacks to run when adjusting tag attributes;
223
+ defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
224
+
225
+ :arg set skip_tags: set of tags that you don't want to linkify the
226
+ contents of; for example, you could set this to ``{'pre'}`` to skip
227
+ linkifying contents of ``pre`` tags
228
+
229
+ :arg bool parse_email: whether or not to linkify email addresses
230
+
231
+ :arg url_re: url matching regex
232
+
233
+ :arg email_re: email matching regex
234
+
235
+ """
236
+ super().__init__(source)
237
+
238
+ self.callbacks = callbacks or []
239
+ self.skip_tags = skip_tags or {}
240
+ self.parse_email = parse_email
241
+
242
+ self.url_re = url_re
243
+ self.email_re = email_re
244
+
245
+ def apply_callbacks(self, attrs, is_new):
246
+ """Given an attrs dict and an is_new bool, runs through callbacks
247
+
248
+ Callbacks can return an adjusted attrs dict or ``None``. In the case of
249
+ ``None``, we stop going through callbacks and return that and the link
250
+ gets dropped.
251
+
252
+ :arg dict attrs: map of ``(namespace, name)`` -> ``value``
253
+
254
+ :arg bool is_new: whether or not this link was added by linkify
255
+
256
+ :returns: adjusted attrs dict or ``None``
257
+
258
+ """
259
+ for cb in self.callbacks:
260
+ attrs = cb(attrs, is_new)
261
+ if attrs is None:
262
+ return None
263
+ return attrs
264
+
265
+ def extract_character_data(self, token_list):
266
+ """Extracts and squashes character sequences in a token stream"""
267
+ # FIXME(willkg): This is a terrible idea. What it does is drop all the
268
+ # tags from the token list and merge the Characters and SpaceCharacters
269
+ # tokens into a single text.
270
+ #
271
+ # So something like this::
272
+ #
273
+ # "<span>" "<b>" "some text" "</b>" "</span>"
274
+ #
275
+ # gets converted to "some text".
276
+ #
277
+ # This gets used to figure out the ``_text`` fauxttribute value for
278
+ # linkify callables.
279
+ #
280
+ # I'm not really sure how else to support that ``_text`` fauxttribute and
281
+ # maintain some modicum of backwards compatibility with previous versions
282
+ # of Bleach.
283
+
284
+ out = []
285
+ for token in token_list:
286
+ token_type = token["type"]
287
+ if token_type in ["Characters", "SpaceCharacters"]:
288
+ out.append(token["data"])
289
+
290
+ return "".join(out)
291
+
292
+ def handle_email_addresses(self, src_iter):
293
+ """Handle email addresses in character tokens"""
294
+ for token in src_iter:
295
+ if token["type"] == "Characters":
296
+ text = token["data"]
297
+ new_tokens = []
298
+ end = 0
299
+
300
+ # For each email address we find in the text
301
+ for match in self.email_re.finditer(text):
302
+ if match.start() > end:
303
+ new_tokens.append(
304
+ {"type": "Characters", "data": text[end : match.start()]}
305
+ )
306
+
307
+ # URL-encode the "local-part" according to RFC6068
308
+ parts = match.group(0).split("@")
309
+ parts[0] = quote(parts[0])
310
+ address = "@".join(parts)
311
+
312
+ # Run attributes through the callbacks to see what we
313
+ # should do with this match
314
+ attrs = {
315
+ (None, "href"): "mailto:%s" % address,
316
+ "_text": match.group(0),
317
+ }
318
+ attrs = self.apply_callbacks(attrs, True)
319
+
320
+ if attrs is None:
321
+ # Just add the text--but not as a link
322
+ new_tokens.append(
323
+ {"type": "Characters", "data": match.group(0)}
324
+ )
325
+
326
+ else:
327
+ # Add an "a" tag for the new link
328
+ _text = attrs.pop("_text", "")
329
+ new_tokens.extend(
330
+ [
331
+ {"type": "StartTag", "name": "a", "data": attrs},
332
+ {"type": "Characters", "data": str(_text)},
333
+ {"type": "EndTag", "name": "a"},
334
+ ]
335
+ )
336
+ end = match.end()
337
+
338
+ if new_tokens:
339
+ # Yield the adjusted set of tokens and then continue
340
+ # through the loop
341
+ if end < len(text):
342
+ new_tokens.append({"type": "Characters", "data": text[end:]})
343
+
344
+ yield from new_tokens
345
+
346
+ continue
347
+
348
+ yield token
349
+
350
+ def strip_non_url_bits(self, fragment):
351
+ """Strips non-url bits from the url
352
+
353
+ This accounts for over-eager matching by the regex.
354
+
355
+ """
356
+ prefix = suffix = ""
357
+
358
+ while fragment:
359
+ # Try removing ( from the beginning and, if it's balanced, from the
360
+ # end, too
361
+ if fragment.startswith("("):
362
+ prefix = prefix + "("
363
+ fragment = fragment[1:]
364
+
365
+ if fragment.endswith(")"):
366
+ suffix = ")" + suffix
367
+ fragment = fragment[:-1]
368
+ continue
369
+
370
+ # Now try extraneous things from the end. For example, sometimes we
371
+ # pick up ) at the end of a url, but the url is in a parenthesized
372
+ # phrase like:
373
+ #
374
+ # "i looked at the site (at http://example.com)"
375
+
376
+ if fragment.endswith(")") and "(" not in fragment:
377
+ fragment = fragment[:-1]
378
+ suffix = ")" + suffix
379
+ continue
380
+
381
+ # Handle commas
382
+ if fragment.endswith(","):
383
+ fragment = fragment[:-1]
384
+ suffix = "," + suffix
385
+ continue
386
+
387
+ # Handle periods
388
+ if fragment.endswith("."):
389
+ fragment = fragment[:-1]
390
+ suffix = "." + suffix
391
+ continue
392
+
393
+ # Nothing matched, so we're done
394
+ break
395
+
396
+ return fragment, prefix, suffix
397
+
398
+ def handle_links(self, src_iter):
399
+ """Handle links in character tokens"""
400
+ in_a = False # happens, if parse_email=True and if a mail was found
401
+ for token in src_iter:
402
+ if in_a:
403
+ if token["type"] == "EndTag" and token["name"] == "a":
404
+ in_a = False
405
+ yield token
406
+ continue
407
+ elif token["type"] == "StartTag" and token["name"] == "a":
408
+ in_a = True
409
+ yield token
410
+ continue
411
+ if token["type"] == "Characters":
412
+ text = token["data"]
413
+ new_tokens = []
414
+ end = 0
415
+
416
+ for match in self.url_re.finditer(text):
417
+ if match.start() > end:
418
+ new_tokens.append(
419
+ {"type": "Characters", "data": text[end : match.start()]}
420
+ )
421
+
422
+ url = match.group(0)
423
+ prefix = suffix = ""
424
+
425
+ # Sometimes we pick up too much in the url match, so look for
426
+ # bits we should drop and remove them from the match
427
+ url, prefix, suffix = self.strip_non_url_bits(url)
428
+
429
+ # If there's no protocol, add one
430
+ if PROTO_RE.search(url):
431
+ href = url
432
+ else:
433
+ href = "http://%s" % url
434
+
435
+ attrs = {(None, "href"): href, "_text": url}
436
+ attrs = self.apply_callbacks(attrs, True)
437
+
438
+ if attrs is None:
439
+ # Just add the text
440
+ new_tokens.append(
441
+ {"type": "Characters", "data": prefix + url + suffix}
442
+ )
443
+
444
+ else:
445
+ # Add the "a" tag!
446
+ if prefix:
447
+ new_tokens.append({"type": "Characters", "data": prefix})
448
+
449
+ _text = attrs.pop("_text", "")
450
+ new_tokens.extend(
451
+ [
452
+ {"type": "StartTag", "name": "a", "data": attrs},
453
+ {"type": "Characters", "data": str(_text)},
454
+ {"type": "EndTag", "name": "a"},
455
+ ]
456
+ )
457
+
458
+ if suffix:
459
+ new_tokens.append({"type": "Characters", "data": suffix})
460
+
461
+ end = match.end()
462
+
463
+ if new_tokens:
464
+ # Yield the adjusted set of tokens and then continue
465
+ # through the loop
466
+ if end < len(text):
467
+ new_tokens.append({"type": "Characters", "data": text[end:]})
468
+
469
+ yield from new_tokens
470
+
471
+ continue
472
+
473
+ yield token
474
+
475
+ def handle_a_tag(self, token_buffer):
476
+ """Handle the "a" tag
477
+
478
+ This could adjust the link or drop it altogether depending on what the
479
+ callbacks return.
480
+
481
+ This yields the new set of tokens.
482
+
483
+ """
484
+ a_token = token_buffer[0]
485
+ if a_token["data"]:
486
+ attrs = a_token["data"]
487
+ else:
488
+ attrs = {}
489
+ text = self.extract_character_data(token_buffer)
490
+ attrs["_text"] = text
491
+
492
+ attrs = self.apply_callbacks(attrs, False)
493
+
494
+ if attrs is None:
495
+ # We're dropping the "a" tag and everything else and replacing
496
+ # it with character data. So emit that token.
497
+ yield {"type": "Characters", "data": text}
498
+
499
+ else:
500
+ new_text = attrs.pop("_text", "")
501
+ a_token["data"] = attrs
502
+
503
+ if text == new_text:
504
+ # The callbacks didn't change the text, so we yield the new "a"
505
+ # token, then whatever else was there, then the end "a" token
506
+ yield a_token
507
+ yield from token_buffer[1:]
508
+
509
+ else:
510
+ # If the callbacks changed the text, then we're going to drop
511
+ # all the tokens between the start and end "a" tags and replace
512
+ # it with the new text
513
+ yield a_token
514
+ yield {"type": "Characters", "data": str(new_text)}
515
+ yield token_buffer[-1]
516
+
517
+ def extract_entities(self, token):
518
+ """Handles Characters tokens with entities
519
+
520
+ Our overridden tokenizer doesn't do anything with entities. However,
521
+ that means that the serializer will convert all ``&`` in Characters
522
+ tokens to ``&amp;``.
523
+
524
+ Since we don't want that, we extract entities here and convert them to
525
+ Entity tokens so the serializer will let them be.
526
+
527
+ :arg token: the Characters token to work on
528
+
529
+ :returns: generator of tokens
530
+
531
+ """
532
+ data = token.get("data", "")
533
+
534
+ # If there isn't a & in the data, we can return now
535
+ if "&" not in data:
536
+ yield token
537
+ return
538
+
539
+ new_tokens = []
540
+
541
+ # For each possible entity that starts with a "&", we try to extract an
542
+ # actual entity and re-tokenize accordingly
543
+ for part in html5lib_shim.next_possible_entity(data):
544
+ if not part:
545
+ continue
546
+
547
+ if part.startswith("&"):
548
+ entity = html5lib_shim.match_entity(part)
549
+ if entity is not None:
550
+ if entity == "amp":
551
+ # LinkifyFilter can't match urls across token boundaries
552
+ # which is problematic with &amp; since that shows up in
553
+ # querystrings all the time. This special-cases &amp;
554
+ # and converts it to a & and sticks it in as a
555
+ # Characters token. It'll get merged with surrounding
556
+ # tokens in the BleachSanitizerfilter.__iter__ and
557
+ # escaped in the serializer.
558
+ new_tokens.append({"type": "Characters", "data": "&"})
559
+ else:
560
+ new_tokens.append({"type": "Entity", "name": entity})
561
+
562
+ # Length of the entity plus 2--one for & at the beginning
563
+ # and one for ; at the end
564
+ remainder = part[len(entity) + 2 :]
565
+ if remainder:
566
+ new_tokens.append({"type": "Characters", "data": remainder})
567
+ continue
568
+
569
+ new_tokens.append({"type": "Characters", "data": part})
570
+
571
+ yield from new_tokens
572
+
573
+ def __iter__(self):
574
+ in_a = False
575
+ in_skip_tag = None
576
+
577
+ token_buffer = []
578
+
579
+ for token in super().__iter__():
580
+ if in_a:
581
+ # Handle the case where we're in an "a" tag--we want to buffer tokens
582
+ # until we hit an end "a" tag.
583
+ if token["type"] == "EndTag" and token["name"] == "a":
584
+ # Add the end tag to the token buffer and then handle them
585
+ # and yield anything returned
586
+ token_buffer.append(token)
587
+ yield from self.handle_a_tag(token_buffer)
588
+
589
+ # Clear "a" related state and continue since we've yielded all
590
+ # the tokens we're going to yield
591
+ in_a = False
592
+ token_buffer = []
593
+ else:
594
+ token_buffer.extend(list(self.extract_entities(token)))
595
+ continue
596
+
597
+ if token["type"] in ["StartTag", "EmptyTag"]:
598
+ if token["name"] in self.skip_tags:
599
+ # Skip tags start a "special mode" where we don't linkify
600
+ # anything until the end tag.
601
+ in_skip_tag = token["name"]
602
+
603
+ elif token["name"] == "a":
604
+ # The "a" tag is special--we switch to a slurp mode and
605
+ # slurp all the tokens until the end "a" tag and then
606
+ # figure out what to do with them there.
607
+ in_a = True
608
+ token_buffer.append(token)
609
+
610
+ # We buffer the start tag, so we don't want to yield it,
611
+ # yet
612
+ continue
613
+
614
+ elif in_skip_tag and self.skip_tags:
615
+ # NOTE(willkg): We put this clause here since in_a and
616
+ # switching in and out of in_a takes precedence.
617
+ if token["type"] == "EndTag" and token["name"] == in_skip_tag:
618
+ in_skip_tag = None
619
+
620
+ elif not in_a and not in_skip_tag and token["type"] == "Characters":
621
+ new_stream = iter([token])
622
+ if self.parse_email:
623
+ new_stream = self.handle_email_addresses(new_stream)
624
+
625
+ new_stream = self.handle_links(new_stream)
626
+
627
+ for new_token in new_stream:
628
+ yield from self.extract_entities(new_token)
629
+
630
+ # We've already yielded this token, so continue
631
+ continue
632
+
633
+ yield token
@@ -0,0 +1 @@
1
+ from tensorbored._vendor.bleach._vendor.parse import urlparse # noqa