tensorbored 2.21.0rc1769983804__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tensorbored/__init__.py +112 -0
- tensorbored/_vendor/__init__.py +0 -0
- tensorbored/_vendor/bleach/__init__.py +125 -0
- tensorbored/_vendor/bleach/_vendor/__init__.py +0 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/__init__.py +35 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_ihatexml.py +289 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_inputstream.py +918 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_tokenizer.py +1735 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_trie/__init__.py +5 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_trie/_base.py +40 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_trie/py.py +67 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/_utils.py +159 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/constants.py +2946 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/__init__.py +0 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/alphabeticalattributes.py +29 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/base.py +12 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/inject_meta_charset.py +73 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/lint.py +93 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/optionaltags.py +207 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/sanitizer.py +916 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/filters/whitespace.py +38 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/html5parser.py +2795 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/serializer.py +409 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/__init__.py +30 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/genshi.py +54 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treeadapters/sax.py +50 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/__init__.py +88 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/base.py +417 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/dom.py +239 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/etree.py +343 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treebuilders/etree_lxml.py +392 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/__init__.py +154 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/base.py +252 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/dom.py +43 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/etree.py +131 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/etree_lxml.py +215 -0
- tensorbored/_vendor/bleach/_vendor/html5lib/treewalkers/genshi.py +69 -0
- tensorbored/_vendor/bleach/_vendor/parse.py +1078 -0
- tensorbored/_vendor/bleach/callbacks.py +32 -0
- tensorbored/_vendor/bleach/html5lib_shim.py +757 -0
- tensorbored/_vendor/bleach/linkifier.py +633 -0
- tensorbored/_vendor/bleach/parse_shim.py +1 -0
- tensorbored/_vendor/bleach/sanitizer.py +638 -0
- tensorbored/_vendor/bleach/six_shim.py +19 -0
- tensorbored/_vendor/webencodings/__init__.py +342 -0
- tensorbored/_vendor/webencodings/labels.py +231 -0
- tensorbored/_vendor/webencodings/mklabels.py +59 -0
- tensorbored/_vendor/webencodings/x_user_defined.py +325 -0
- tensorbored/assets.py +36 -0
- tensorbored/auth.py +102 -0
- tensorbored/backend/__init__.py +0 -0
- tensorbored/backend/application.py +604 -0
- tensorbored/backend/auth_context_middleware.py +38 -0
- tensorbored/backend/client_feature_flags.py +113 -0
- tensorbored/backend/empty_path_redirect.py +46 -0
- tensorbored/backend/event_processing/__init__.py +0 -0
- tensorbored/backend/event_processing/data_ingester.py +276 -0
- tensorbored/backend/event_processing/data_provider.py +535 -0
- tensorbored/backend/event_processing/directory_loader.py +142 -0
- tensorbored/backend/event_processing/directory_watcher.py +272 -0
- tensorbored/backend/event_processing/event_accumulator.py +950 -0
- tensorbored/backend/event_processing/event_file_inspector.py +463 -0
- tensorbored/backend/event_processing/event_file_loader.py +292 -0
- tensorbored/backend/event_processing/event_multiplexer.py +521 -0
- tensorbored/backend/event_processing/event_util.py +68 -0
- tensorbored/backend/event_processing/io_wrapper.py +223 -0
- tensorbored/backend/event_processing/plugin_asset_util.py +104 -0
- tensorbored/backend/event_processing/plugin_event_accumulator.py +721 -0
- tensorbored/backend/event_processing/plugin_event_multiplexer.py +522 -0
- tensorbored/backend/event_processing/reservoir.py +266 -0
- tensorbored/backend/event_processing/tag_types.py +29 -0
- tensorbored/backend/experiment_id.py +71 -0
- tensorbored/backend/experimental_plugin.py +51 -0
- tensorbored/backend/http_util.py +263 -0
- tensorbored/backend/json_util.py +70 -0
- tensorbored/backend/path_prefix.py +67 -0
- tensorbored/backend/process_graph.py +74 -0
- tensorbored/backend/security_validator.py +202 -0
- tensorbored/compat/__init__.py +69 -0
- tensorbored/compat/proto/__init__.py +0 -0
- tensorbored/compat/proto/allocation_description_pb2.py +35 -0
- tensorbored/compat/proto/api_def_pb2.py +82 -0
- tensorbored/compat/proto/attr_value_pb2.py +80 -0
- tensorbored/compat/proto/cluster_pb2.py +58 -0
- tensorbored/compat/proto/config_pb2.py +271 -0
- tensorbored/compat/proto/coordination_config_pb2.py +45 -0
- tensorbored/compat/proto/cost_graph_pb2.py +87 -0
- tensorbored/compat/proto/cpp_shape_inference_pb2.py +70 -0
- tensorbored/compat/proto/debug_pb2.py +65 -0
- tensorbored/compat/proto/event_pb2.py +149 -0
- tensorbored/compat/proto/full_type_pb2.py +74 -0
- tensorbored/compat/proto/function_pb2.py +157 -0
- tensorbored/compat/proto/graph_debug_info_pb2.py +111 -0
- tensorbored/compat/proto/graph_pb2.py +41 -0
- tensorbored/compat/proto/histogram_pb2.py +39 -0
- tensorbored/compat/proto/meta_graph_pb2.py +254 -0
- tensorbored/compat/proto/node_def_pb2.py +61 -0
- tensorbored/compat/proto/op_def_pb2.py +81 -0
- tensorbored/compat/proto/resource_handle_pb2.py +48 -0
- tensorbored/compat/proto/rewriter_config_pb2.py +93 -0
- tensorbored/compat/proto/rpc_options_pb2.py +35 -0
- tensorbored/compat/proto/saved_object_graph_pb2.py +193 -0
- tensorbored/compat/proto/saver_pb2.py +38 -0
- tensorbored/compat/proto/step_stats_pb2.py +116 -0
- tensorbored/compat/proto/struct_pb2.py +144 -0
- tensorbored/compat/proto/summary_pb2.py +111 -0
- tensorbored/compat/proto/tensor_description_pb2.py +38 -0
- tensorbored/compat/proto/tensor_pb2.py +68 -0
- tensorbored/compat/proto/tensor_shape_pb2.py +46 -0
- tensorbored/compat/proto/tfprof_log_pb2.py +307 -0
- tensorbored/compat/proto/trackable_object_graph_pb2.py +90 -0
- tensorbored/compat/proto/types_pb2.py +105 -0
- tensorbored/compat/proto/variable_pb2.py +62 -0
- tensorbored/compat/proto/verifier_config_pb2.py +38 -0
- tensorbored/compat/proto/versions_pb2.py +35 -0
- tensorbored/compat/tensorflow_stub/__init__.py +38 -0
- tensorbored/compat/tensorflow_stub/app.py +124 -0
- tensorbored/compat/tensorflow_stub/compat/__init__.py +131 -0
- tensorbored/compat/tensorflow_stub/compat/v1/__init__.py +20 -0
- tensorbored/compat/tensorflow_stub/dtypes.py +692 -0
- tensorbored/compat/tensorflow_stub/error_codes.py +169 -0
- tensorbored/compat/tensorflow_stub/errors.py +507 -0
- tensorbored/compat/tensorflow_stub/flags.py +124 -0
- tensorbored/compat/tensorflow_stub/io/__init__.py +17 -0
- tensorbored/compat/tensorflow_stub/io/gfile.py +1011 -0
- tensorbored/compat/tensorflow_stub/pywrap_tensorflow.py +285 -0
- tensorbored/compat/tensorflow_stub/tensor_shape.py +1035 -0
- tensorbored/context.py +129 -0
- tensorbored/data/__init__.py +0 -0
- tensorbored/data/grpc_provider.py +365 -0
- tensorbored/data/ingester.py +46 -0
- tensorbored/data/proto/__init__.py +0 -0
- tensorbored/data/proto/data_provider_pb2.py +517 -0
- tensorbored/data/proto/data_provider_pb2_grpc.py +374 -0
- tensorbored/data/provider.py +1365 -0
- tensorbored/data/server_ingester.py +301 -0
- tensorbored/data_compat.py +159 -0
- tensorbored/dataclass_compat.py +224 -0
- tensorbored/default.py +124 -0
- tensorbored/errors.py +130 -0
- tensorbored/lazy.py +99 -0
- tensorbored/main.py +48 -0
- tensorbored/main_lib.py +62 -0
- tensorbored/manager.py +487 -0
- tensorbored/notebook.py +441 -0
- tensorbored/plugin_util.py +266 -0
- tensorbored/plugins/__init__.py +0 -0
- tensorbored/plugins/audio/__init__.py +0 -0
- tensorbored/plugins/audio/audio_plugin.py +229 -0
- tensorbored/plugins/audio/metadata.py +69 -0
- tensorbored/plugins/audio/plugin_data_pb2.py +37 -0
- tensorbored/plugins/audio/summary.py +230 -0
- tensorbored/plugins/audio/summary_v2.py +124 -0
- tensorbored/plugins/base_plugin.py +367 -0
- tensorbored/plugins/core/__init__.py +0 -0
- tensorbored/plugins/core/core_plugin.py +981 -0
- tensorbored/plugins/custom_scalar/__init__.py +0 -0
- tensorbored/plugins/custom_scalar/custom_scalars_plugin.py +320 -0
- tensorbored/plugins/custom_scalar/layout_pb2.py +85 -0
- tensorbored/plugins/custom_scalar/metadata.py +35 -0
- tensorbored/plugins/custom_scalar/summary.py +79 -0
- tensorbored/plugins/debugger_v2/__init__.py +0 -0
- tensorbored/plugins/debugger_v2/debug_data_multiplexer.py +631 -0
- tensorbored/plugins/debugger_v2/debug_data_provider.py +634 -0
- tensorbored/plugins/debugger_v2/debugger_v2_plugin.py +504 -0
- tensorbored/plugins/distribution/__init__.py +0 -0
- tensorbored/plugins/distribution/compressor.py +158 -0
- tensorbored/plugins/distribution/distributions_plugin.py +116 -0
- tensorbored/plugins/distribution/metadata.py +19 -0
- tensorbored/plugins/graph/__init__.py +0 -0
- tensorbored/plugins/graph/graph_util.py +129 -0
- tensorbored/plugins/graph/graphs_plugin.py +336 -0
- tensorbored/plugins/graph/keras_util.py +328 -0
- tensorbored/plugins/graph/metadata.py +42 -0
- tensorbored/plugins/histogram/__init__.py +0 -0
- tensorbored/plugins/histogram/histograms_plugin.py +144 -0
- tensorbored/plugins/histogram/metadata.py +63 -0
- tensorbored/plugins/histogram/plugin_data_pb2.py +34 -0
- tensorbored/plugins/histogram/summary.py +234 -0
- tensorbored/plugins/histogram/summary_v2.py +292 -0
- tensorbored/plugins/hparams/__init__.py +14 -0
- tensorbored/plugins/hparams/_keras.py +93 -0
- tensorbored/plugins/hparams/api.py +130 -0
- tensorbored/plugins/hparams/api_pb2.py +208 -0
- tensorbored/plugins/hparams/backend_context.py +606 -0
- tensorbored/plugins/hparams/download_data.py +158 -0
- tensorbored/plugins/hparams/error.py +26 -0
- tensorbored/plugins/hparams/get_experiment.py +71 -0
- tensorbored/plugins/hparams/hparams_plugin.py +206 -0
- tensorbored/plugins/hparams/hparams_util_pb2.py +69 -0
- tensorbored/plugins/hparams/json_format_compat.py +38 -0
- tensorbored/plugins/hparams/list_metric_evals.py +57 -0
- tensorbored/plugins/hparams/list_session_groups.py +1040 -0
- tensorbored/plugins/hparams/metadata.py +125 -0
- tensorbored/plugins/hparams/metrics.py +41 -0
- tensorbored/plugins/hparams/plugin_data_pb2.py +69 -0
- tensorbored/plugins/hparams/summary.py +205 -0
- tensorbored/plugins/hparams/summary_v2.py +597 -0
- tensorbored/plugins/image/__init__.py +0 -0
- tensorbored/plugins/image/images_plugin.py +232 -0
- tensorbored/plugins/image/metadata.py +65 -0
- tensorbored/plugins/image/plugin_data_pb2.py +34 -0
- tensorbored/plugins/image/summary.py +159 -0
- tensorbored/plugins/image/summary_v2.py +130 -0
- tensorbored/plugins/mesh/__init__.py +14 -0
- tensorbored/plugins/mesh/mesh_plugin.py +292 -0
- tensorbored/plugins/mesh/metadata.py +152 -0
- tensorbored/plugins/mesh/plugin_data_pb2.py +37 -0
- tensorbored/plugins/mesh/summary.py +251 -0
- tensorbored/plugins/mesh/summary_v2.py +214 -0
- tensorbored/plugins/metrics/__init__.py +0 -0
- tensorbored/plugins/metrics/metadata.py +17 -0
- tensorbored/plugins/metrics/metrics_plugin.py +623 -0
- tensorbored/plugins/pr_curve/__init__.py +0 -0
- tensorbored/plugins/pr_curve/metadata.py +75 -0
- tensorbored/plugins/pr_curve/plugin_data_pb2.py +34 -0
- tensorbored/plugins/pr_curve/pr_curves_plugin.py +241 -0
- tensorbored/plugins/pr_curve/summary.py +574 -0
- tensorbored/plugins/profile_redirect/__init__.py +0 -0
- tensorbored/plugins/profile_redirect/profile_redirect_plugin.py +49 -0
- tensorbored/plugins/projector/__init__.py +67 -0
- tensorbored/plugins/projector/metadata.py +26 -0
- tensorbored/plugins/projector/projector_config_pb2.py +54 -0
- tensorbored/plugins/projector/projector_plugin.py +795 -0
- tensorbored/plugins/projector/tf_projector_plugin/index.js +32 -0
- tensorbored/plugins/projector/tf_projector_plugin/projector_binary.html +524 -0
- tensorbored/plugins/projector/tf_projector_plugin/projector_binary.js +15536 -0
- tensorbored/plugins/scalar/__init__.py +0 -0
- tensorbored/plugins/scalar/metadata.py +60 -0
- tensorbored/plugins/scalar/plugin_data_pb2.py +34 -0
- tensorbored/plugins/scalar/scalars_plugin.py +181 -0
- tensorbored/plugins/scalar/summary.py +109 -0
- tensorbored/plugins/scalar/summary_v2.py +124 -0
- tensorbored/plugins/text/__init__.py +0 -0
- tensorbored/plugins/text/metadata.py +62 -0
- tensorbored/plugins/text/plugin_data_pb2.py +34 -0
- tensorbored/plugins/text/summary.py +114 -0
- tensorbored/plugins/text/summary_v2.py +124 -0
- tensorbored/plugins/text/text_plugin.py +288 -0
- tensorbored/plugins/wit_redirect/__init__.py +0 -0
- tensorbored/plugins/wit_redirect/wit_redirect_plugin.py +49 -0
- tensorbored/program.py +910 -0
- tensorbored/summary/__init__.py +35 -0
- tensorbored/summary/_output.py +124 -0
- tensorbored/summary/_tf/__init__.py +14 -0
- tensorbored/summary/_tf/summary/__init__.py +178 -0
- tensorbored/summary/_writer.py +105 -0
- tensorbored/summary/v1.py +51 -0
- tensorbored/summary/v2.py +25 -0
- tensorbored/summary/writer/__init__.py +13 -0
- tensorbored/summary/writer/event_file_writer.py +291 -0
- tensorbored/summary/writer/record_writer.py +50 -0
- tensorbored/util/__init__.py +0 -0
- tensorbored/util/encoder.py +116 -0
- tensorbored/util/grpc_util.py +311 -0
- tensorbored/util/img_mime_type_detector.py +40 -0
- tensorbored/util/io_util.py +20 -0
- tensorbored/util/lazy_tensor_creator.py +110 -0
- tensorbored/util/op_evaluator.py +104 -0
- tensorbored/util/platform_util.py +20 -0
- tensorbored/util/tb_logging.py +24 -0
- tensorbored/util/tensor_util.py +617 -0
- tensorbored/util/timing.py +122 -0
- tensorbored/version.py +21 -0
- tensorbored/webfiles.zip +0 -0
- tensorbored-2.21.0rc1769983804.dist-info/METADATA +49 -0
- tensorbored-2.21.0rc1769983804.dist-info/RECORD +271 -0
- tensorbored-2.21.0rc1769983804.dist-info/WHEEL +5 -0
- tensorbored-2.21.0rc1769983804.dist-info/entry_points.txt +6 -0
- tensorbored-2.21.0rc1769983804.dist-info/licenses/LICENSE +739 -0
- tensorbored-2.21.0rc1769983804.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
from __future__ import absolute_import, division, unicode_literals
|
|
2
|
+
|
|
3
|
+
from xml.dom import Node
|
|
4
|
+
from ..constants import namespaces, voidElements, spaceCharacters
|
|
5
|
+
|
|
6
|
+
__all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN",
|
|
7
|
+
"TreeWalker", "NonRecursiveTreeWalker"]
|
|
8
|
+
|
|
9
|
+
DOCUMENT = Node.DOCUMENT_NODE
|
|
10
|
+
DOCTYPE = Node.DOCUMENT_TYPE_NODE
|
|
11
|
+
TEXT = Node.TEXT_NODE
|
|
12
|
+
ELEMENT = Node.ELEMENT_NODE
|
|
13
|
+
COMMENT = Node.COMMENT_NODE
|
|
14
|
+
ENTITY = Node.ENTITY_NODE
|
|
15
|
+
UNKNOWN = "<#UNKNOWN#>"
|
|
16
|
+
|
|
17
|
+
spaceCharacters = "".join(spaceCharacters)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class TreeWalker(object):
|
|
21
|
+
"""Walks a tree yielding tokens
|
|
22
|
+
|
|
23
|
+
Tokens are dicts that all have a ``type`` field specifying the type of the
|
|
24
|
+
token.
|
|
25
|
+
|
|
26
|
+
"""
|
|
27
|
+
def __init__(self, tree):
|
|
28
|
+
"""Creates a TreeWalker
|
|
29
|
+
|
|
30
|
+
:arg tree: the tree to walk
|
|
31
|
+
|
|
32
|
+
"""
|
|
33
|
+
self.tree = tree
|
|
34
|
+
|
|
35
|
+
def __iter__(self):
|
|
36
|
+
raise NotImplementedError
|
|
37
|
+
|
|
38
|
+
def error(self, msg):
|
|
39
|
+
"""Generates an error token with the given message
|
|
40
|
+
|
|
41
|
+
:arg msg: the error message
|
|
42
|
+
|
|
43
|
+
:returns: SerializeError token
|
|
44
|
+
|
|
45
|
+
"""
|
|
46
|
+
return {"type": "SerializeError", "data": msg}
|
|
47
|
+
|
|
48
|
+
def emptyTag(self, namespace, name, attrs, hasChildren=False):
|
|
49
|
+
"""Generates an EmptyTag token
|
|
50
|
+
|
|
51
|
+
:arg namespace: the namespace of the token--can be ``None``
|
|
52
|
+
|
|
53
|
+
:arg name: the name of the element
|
|
54
|
+
|
|
55
|
+
:arg attrs: the attributes of the element as a dict
|
|
56
|
+
|
|
57
|
+
:arg hasChildren: whether or not to yield a SerializationError because
|
|
58
|
+
this tag shouldn't have children
|
|
59
|
+
|
|
60
|
+
:returns: EmptyTag token
|
|
61
|
+
|
|
62
|
+
"""
|
|
63
|
+
yield {"type": "EmptyTag", "name": name,
|
|
64
|
+
"namespace": namespace,
|
|
65
|
+
"data": attrs}
|
|
66
|
+
if hasChildren:
|
|
67
|
+
yield self.error("Void element has children")
|
|
68
|
+
|
|
69
|
+
def startTag(self, namespace, name, attrs):
|
|
70
|
+
"""Generates a StartTag token
|
|
71
|
+
|
|
72
|
+
:arg namespace: the namespace of the token--can be ``None``
|
|
73
|
+
|
|
74
|
+
:arg name: the name of the element
|
|
75
|
+
|
|
76
|
+
:arg attrs: the attributes of the element as a dict
|
|
77
|
+
|
|
78
|
+
:returns: StartTag token
|
|
79
|
+
|
|
80
|
+
"""
|
|
81
|
+
return {"type": "StartTag",
|
|
82
|
+
"name": name,
|
|
83
|
+
"namespace": namespace,
|
|
84
|
+
"data": attrs}
|
|
85
|
+
|
|
86
|
+
def endTag(self, namespace, name):
|
|
87
|
+
"""Generates an EndTag token
|
|
88
|
+
|
|
89
|
+
:arg namespace: the namespace of the token--can be ``None``
|
|
90
|
+
|
|
91
|
+
:arg name: the name of the element
|
|
92
|
+
|
|
93
|
+
:returns: EndTag token
|
|
94
|
+
|
|
95
|
+
"""
|
|
96
|
+
return {"type": "EndTag",
|
|
97
|
+
"name": name,
|
|
98
|
+
"namespace": namespace}
|
|
99
|
+
|
|
100
|
+
def text(self, data):
|
|
101
|
+
"""Generates SpaceCharacters and Characters tokens
|
|
102
|
+
|
|
103
|
+
Depending on what's in the data, this generates one or more
|
|
104
|
+
``SpaceCharacters`` and ``Characters`` tokens.
|
|
105
|
+
|
|
106
|
+
For example:
|
|
107
|
+
|
|
108
|
+
>>> from html5lib.treewalkers.base import TreeWalker
|
|
109
|
+
>>> # Give it an empty tree just so it instantiates
|
|
110
|
+
>>> walker = TreeWalker([])
|
|
111
|
+
>>> list(walker.text(''))
|
|
112
|
+
[]
|
|
113
|
+
>>> list(walker.text(' '))
|
|
114
|
+
[{u'data': ' ', u'type': u'SpaceCharacters'}]
|
|
115
|
+
>>> list(walker.text(' abc ')) # doctest: +NORMALIZE_WHITESPACE
|
|
116
|
+
[{u'data': ' ', u'type': u'SpaceCharacters'},
|
|
117
|
+
{u'data': u'abc', u'type': u'Characters'},
|
|
118
|
+
{u'data': u' ', u'type': u'SpaceCharacters'}]
|
|
119
|
+
|
|
120
|
+
:arg data: the text data
|
|
121
|
+
|
|
122
|
+
:returns: one or more ``SpaceCharacters`` and ``Characters`` tokens
|
|
123
|
+
|
|
124
|
+
"""
|
|
125
|
+
data = data
|
|
126
|
+
middle = data.lstrip(spaceCharacters)
|
|
127
|
+
left = data[:len(data) - len(middle)]
|
|
128
|
+
if left:
|
|
129
|
+
yield {"type": "SpaceCharacters", "data": left}
|
|
130
|
+
data = middle
|
|
131
|
+
middle = data.rstrip(spaceCharacters)
|
|
132
|
+
right = data[len(middle):]
|
|
133
|
+
if middle:
|
|
134
|
+
yield {"type": "Characters", "data": middle}
|
|
135
|
+
if right:
|
|
136
|
+
yield {"type": "SpaceCharacters", "data": right}
|
|
137
|
+
|
|
138
|
+
def comment(self, data):
|
|
139
|
+
"""Generates a Comment token
|
|
140
|
+
|
|
141
|
+
:arg data: the comment
|
|
142
|
+
|
|
143
|
+
:returns: Comment token
|
|
144
|
+
|
|
145
|
+
"""
|
|
146
|
+
return {"type": "Comment", "data": data}
|
|
147
|
+
|
|
148
|
+
def doctype(self, name, publicId=None, systemId=None):
|
|
149
|
+
"""Generates a Doctype token
|
|
150
|
+
|
|
151
|
+
:arg name:
|
|
152
|
+
|
|
153
|
+
:arg publicId:
|
|
154
|
+
|
|
155
|
+
:arg systemId:
|
|
156
|
+
|
|
157
|
+
:returns: the Doctype token
|
|
158
|
+
|
|
159
|
+
"""
|
|
160
|
+
return {"type": "Doctype",
|
|
161
|
+
"name": name,
|
|
162
|
+
"publicId": publicId,
|
|
163
|
+
"systemId": systemId}
|
|
164
|
+
|
|
165
|
+
def entity(self, name):
|
|
166
|
+
"""Generates an Entity token
|
|
167
|
+
|
|
168
|
+
:arg name: the entity name
|
|
169
|
+
|
|
170
|
+
:returns: an Entity token
|
|
171
|
+
|
|
172
|
+
"""
|
|
173
|
+
return {"type": "Entity", "name": name}
|
|
174
|
+
|
|
175
|
+
def unknown(self, nodeType):
|
|
176
|
+
"""Handles unknown node types"""
|
|
177
|
+
return self.error("Unknown node type: " + nodeType)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class NonRecursiveTreeWalker(TreeWalker):
|
|
181
|
+
def getNodeDetails(self, node):
|
|
182
|
+
raise NotImplementedError
|
|
183
|
+
|
|
184
|
+
def getFirstChild(self, node):
|
|
185
|
+
raise NotImplementedError
|
|
186
|
+
|
|
187
|
+
def getNextSibling(self, node):
|
|
188
|
+
raise NotImplementedError
|
|
189
|
+
|
|
190
|
+
def getParentNode(self, node):
|
|
191
|
+
raise NotImplementedError
|
|
192
|
+
|
|
193
|
+
def __iter__(self):
|
|
194
|
+
currentNode = self.tree
|
|
195
|
+
while currentNode is not None:
|
|
196
|
+
details = self.getNodeDetails(currentNode)
|
|
197
|
+
type, details = details[0], details[1:]
|
|
198
|
+
hasChildren = False
|
|
199
|
+
|
|
200
|
+
if type == DOCTYPE:
|
|
201
|
+
yield self.doctype(*details)
|
|
202
|
+
|
|
203
|
+
elif type == TEXT:
|
|
204
|
+
for token in self.text(*details):
|
|
205
|
+
yield token
|
|
206
|
+
|
|
207
|
+
elif type == ELEMENT:
|
|
208
|
+
namespace, name, attributes, hasChildren = details
|
|
209
|
+
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
|
|
210
|
+
for token in self.emptyTag(namespace, name, attributes,
|
|
211
|
+
hasChildren):
|
|
212
|
+
yield token
|
|
213
|
+
hasChildren = False
|
|
214
|
+
else:
|
|
215
|
+
yield self.startTag(namespace, name, attributes)
|
|
216
|
+
|
|
217
|
+
elif type == COMMENT:
|
|
218
|
+
yield self.comment(details[0])
|
|
219
|
+
|
|
220
|
+
elif type == ENTITY:
|
|
221
|
+
yield self.entity(details[0])
|
|
222
|
+
|
|
223
|
+
elif type == DOCUMENT:
|
|
224
|
+
hasChildren = True
|
|
225
|
+
|
|
226
|
+
else:
|
|
227
|
+
yield self.unknown(details[0])
|
|
228
|
+
|
|
229
|
+
if hasChildren:
|
|
230
|
+
firstChild = self.getFirstChild(currentNode)
|
|
231
|
+
else:
|
|
232
|
+
firstChild = None
|
|
233
|
+
|
|
234
|
+
if firstChild is not None:
|
|
235
|
+
currentNode = firstChild
|
|
236
|
+
else:
|
|
237
|
+
while currentNode is not None:
|
|
238
|
+
details = self.getNodeDetails(currentNode)
|
|
239
|
+
type, details = details[0], details[1:]
|
|
240
|
+
if type == ELEMENT:
|
|
241
|
+
namespace, name, attributes, hasChildren = details
|
|
242
|
+
if (namespace and namespace != namespaces["html"]) or name not in voidElements:
|
|
243
|
+
yield self.endTag(namespace, name)
|
|
244
|
+
if self.tree is currentNode:
|
|
245
|
+
currentNode = None
|
|
246
|
+
break
|
|
247
|
+
nextSibling = self.getNextSibling(currentNode)
|
|
248
|
+
if nextSibling is not None:
|
|
249
|
+
currentNode = nextSibling
|
|
250
|
+
break
|
|
251
|
+
else:
|
|
252
|
+
currentNode = self.getParentNode(currentNode)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from __future__ import absolute_import, division, unicode_literals
|
|
2
|
+
|
|
3
|
+
from xml.dom import Node
|
|
4
|
+
|
|
5
|
+
from . import base
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TreeWalker(base.NonRecursiveTreeWalker):
|
|
9
|
+
def getNodeDetails(self, node):
|
|
10
|
+
if node.nodeType == Node.DOCUMENT_TYPE_NODE:
|
|
11
|
+
return base.DOCTYPE, node.name, node.publicId, node.systemId
|
|
12
|
+
|
|
13
|
+
elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
|
|
14
|
+
return base.TEXT, node.nodeValue
|
|
15
|
+
|
|
16
|
+
elif node.nodeType == Node.ELEMENT_NODE:
|
|
17
|
+
attrs = {}
|
|
18
|
+
for attr in list(node.attributes.keys()):
|
|
19
|
+
attr = node.getAttributeNode(attr)
|
|
20
|
+
if attr.namespaceURI:
|
|
21
|
+
attrs[(attr.namespaceURI, attr.localName)] = attr.value
|
|
22
|
+
else:
|
|
23
|
+
attrs[(None, attr.name)] = attr.value
|
|
24
|
+
return (base.ELEMENT, node.namespaceURI, node.nodeName,
|
|
25
|
+
attrs, node.hasChildNodes())
|
|
26
|
+
|
|
27
|
+
elif node.nodeType == Node.COMMENT_NODE:
|
|
28
|
+
return base.COMMENT, node.nodeValue
|
|
29
|
+
|
|
30
|
+
elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
|
|
31
|
+
return (base.DOCUMENT,)
|
|
32
|
+
|
|
33
|
+
else:
|
|
34
|
+
return base.UNKNOWN, node.nodeType
|
|
35
|
+
|
|
36
|
+
def getFirstChild(self, node):
|
|
37
|
+
return node.firstChild
|
|
38
|
+
|
|
39
|
+
def getNextSibling(self, node):
|
|
40
|
+
return node.nextSibling
|
|
41
|
+
|
|
42
|
+
def getParentNode(self, node):
|
|
43
|
+
return node.parentNode
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
from __future__ import absolute_import, division, unicode_literals
|
|
2
|
+
|
|
3
|
+
from collections import OrderedDict
|
|
4
|
+
import re
|
|
5
|
+
|
|
6
|
+
from tensorbored._vendor.bleach.six_shim import string_types
|
|
7
|
+
|
|
8
|
+
from . import base
|
|
9
|
+
from .._utils import moduleFactoryFactory
|
|
10
|
+
|
|
11
|
+
tag_regexp = re.compile("{([^}]*)}(.*)")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def getETreeBuilder(ElementTreeImplementation):
|
|
15
|
+
ElementTree = ElementTreeImplementation
|
|
16
|
+
ElementTreeCommentType = ElementTree.Comment("asd").tag
|
|
17
|
+
|
|
18
|
+
class TreeWalker(base.NonRecursiveTreeWalker): # pylint:disable=unused-variable
|
|
19
|
+
"""Given the particular ElementTree representation, this implementation,
|
|
20
|
+
to avoid using recursion, returns "nodes" as tuples with the following
|
|
21
|
+
content:
|
|
22
|
+
|
|
23
|
+
1. The current element
|
|
24
|
+
|
|
25
|
+
2. The index of the element relative to its parent
|
|
26
|
+
|
|
27
|
+
3. A stack of ancestor elements
|
|
28
|
+
|
|
29
|
+
4. A flag "text", "tail" or None to indicate if the current node is a
|
|
30
|
+
text node; either the text or tail of the current element (1)
|
|
31
|
+
"""
|
|
32
|
+
def getNodeDetails(self, node):
|
|
33
|
+
if isinstance(node, tuple): # It might be the root Element
|
|
34
|
+
elt, _, _, flag = node
|
|
35
|
+
if flag in ("text", "tail"):
|
|
36
|
+
return base.TEXT, getattr(elt, flag)
|
|
37
|
+
else:
|
|
38
|
+
node = elt
|
|
39
|
+
|
|
40
|
+
if not(hasattr(node, "tag")):
|
|
41
|
+
node = node.getroot()
|
|
42
|
+
|
|
43
|
+
if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"):
|
|
44
|
+
return (base.DOCUMENT,)
|
|
45
|
+
|
|
46
|
+
elif node.tag == "<!DOCTYPE>":
|
|
47
|
+
return (base.DOCTYPE, node.text,
|
|
48
|
+
node.get("publicId"), node.get("systemId"))
|
|
49
|
+
|
|
50
|
+
elif node.tag == ElementTreeCommentType:
|
|
51
|
+
return base.COMMENT, node.text
|
|
52
|
+
|
|
53
|
+
else:
|
|
54
|
+
assert isinstance(node.tag, string_types), type(node.tag)
|
|
55
|
+
# This is assumed to be an ordinary element
|
|
56
|
+
match = tag_regexp.match(node.tag)
|
|
57
|
+
if match:
|
|
58
|
+
namespace, tag = match.groups()
|
|
59
|
+
else:
|
|
60
|
+
namespace = None
|
|
61
|
+
tag = node.tag
|
|
62
|
+
attrs = OrderedDict()
|
|
63
|
+
for name, value in list(node.attrib.items()):
|
|
64
|
+
match = tag_regexp.match(name)
|
|
65
|
+
if match:
|
|
66
|
+
attrs[(match.group(1), match.group(2))] = value
|
|
67
|
+
else:
|
|
68
|
+
attrs[(None, name)] = value
|
|
69
|
+
return (base.ELEMENT, namespace, tag,
|
|
70
|
+
attrs, len(node) or node.text)
|
|
71
|
+
|
|
72
|
+
def getFirstChild(self, node):
|
|
73
|
+
if isinstance(node, tuple):
|
|
74
|
+
element, key, parents, flag = node
|
|
75
|
+
else:
|
|
76
|
+
element, key, parents, flag = node, None, [], None
|
|
77
|
+
|
|
78
|
+
if flag in ("text", "tail"):
|
|
79
|
+
return None
|
|
80
|
+
else:
|
|
81
|
+
if element.text:
|
|
82
|
+
return element, key, parents, "text"
|
|
83
|
+
elif len(element):
|
|
84
|
+
parents.append(element)
|
|
85
|
+
return element[0], 0, parents, None
|
|
86
|
+
else:
|
|
87
|
+
return None
|
|
88
|
+
|
|
89
|
+
def getNextSibling(self, node):
|
|
90
|
+
if isinstance(node, tuple):
|
|
91
|
+
element, key, parents, flag = node
|
|
92
|
+
else:
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
if flag == "text":
|
|
96
|
+
if len(element):
|
|
97
|
+
parents.append(element)
|
|
98
|
+
return element[0], 0, parents, None
|
|
99
|
+
else:
|
|
100
|
+
return None
|
|
101
|
+
else:
|
|
102
|
+
if element.tail and flag != "tail":
|
|
103
|
+
return element, key, parents, "tail"
|
|
104
|
+
elif key < len(parents[-1]) - 1:
|
|
105
|
+
return parents[-1][key + 1], key + 1, parents, None
|
|
106
|
+
else:
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
def getParentNode(self, node):
|
|
110
|
+
if isinstance(node, tuple):
|
|
111
|
+
element, key, parents, flag = node
|
|
112
|
+
else:
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
if flag == "text":
|
|
116
|
+
if not parents:
|
|
117
|
+
return element
|
|
118
|
+
else:
|
|
119
|
+
return element, key, parents, None
|
|
120
|
+
else:
|
|
121
|
+
parent = parents.pop()
|
|
122
|
+
if not parents:
|
|
123
|
+
return parent
|
|
124
|
+
else:
|
|
125
|
+
assert list(parents[-1]).count(parent) == 1
|
|
126
|
+
return parent, list(parents[-1]).index(parent), parents, None
|
|
127
|
+
|
|
128
|
+
return locals()
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
getETreeModule = moduleFactoryFactory(getETreeBuilder)
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
from __future__ import absolute_import, division, unicode_literals
|
|
2
|
+
from tensorbored._vendor.bleach.six_shim import text_type
|
|
3
|
+
|
|
4
|
+
from collections import OrderedDict
|
|
5
|
+
|
|
6
|
+
from lxml import etree
|
|
7
|
+
from ..treebuilders.etree import tag_regexp
|
|
8
|
+
|
|
9
|
+
from . import base
|
|
10
|
+
|
|
11
|
+
from .. import _ihatexml
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def ensure_str(s):
|
|
15
|
+
if s is None:
|
|
16
|
+
return None
|
|
17
|
+
elif isinstance(s, text_type):
|
|
18
|
+
return s
|
|
19
|
+
else:
|
|
20
|
+
return s.decode("ascii", "strict")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Root(object):
|
|
24
|
+
def __init__(self, et):
|
|
25
|
+
self.elementtree = et
|
|
26
|
+
self.children = []
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
if et.docinfo.internalDTD:
|
|
30
|
+
self.children.append(Doctype(self,
|
|
31
|
+
ensure_str(et.docinfo.root_name),
|
|
32
|
+
ensure_str(et.docinfo.public_id),
|
|
33
|
+
ensure_str(et.docinfo.system_url)))
|
|
34
|
+
except AttributeError:
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
node = et.getroot()
|
|
39
|
+
except AttributeError:
|
|
40
|
+
node = et
|
|
41
|
+
|
|
42
|
+
while node.getprevious() is not None:
|
|
43
|
+
node = node.getprevious()
|
|
44
|
+
while node is not None:
|
|
45
|
+
self.children.append(node)
|
|
46
|
+
node = node.getnext()
|
|
47
|
+
|
|
48
|
+
self.text = None
|
|
49
|
+
self.tail = None
|
|
50
|
+
|
|
51
|
+
def __getitem__(self, key):
|
|
52
|
+
return self.children[key]
|
|
53
|
+
|
|
54
|
+
def getnext(self):
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
def __len__(self):
|
|
58
|
+
return 1
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class Doctype(object):
|
|
62
|
+
def __init__(self, root_node, name, public_id, system_id):
|
|
63
|
+
self.root_node = root_node
|
|
64
|
+
self.name = name
|
|
65
|
+
self.public_id = public_id
|
|
66
|
+
self.system_id = system_id
|
|
67
|
+
|
|
68
|
+
self.text = None
|
|
69
|
+
self.tail = None
|
|
70
|
+
|
|
71
|
+
def getnext(self):
|
|
72
|
+
return self.root_node.children[1]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class FragmentRoot(Root):
|
|
76
|
+
def __init__(self, children):
|
|
77
|
+
self.children = [FragmentWrapper(self, child) for child in children]
|
|
78
|
+
self.text = self.tail = None
|
|
79
|
+
|
|
80
|
+
def getnext(self):
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class FragmentWrapper(object):
|
|
85
|
+
def __init__(self, fragment_root, obj):
|
|
86
|
+
self.root_node = fragment_root
|
|
87
|
+
self.obj = obj
|
|
88
|
+
if hasattr(self.obj, 'text'):
|
|
89
|
+
self.text = ensure_str(self.obj.text)
|
|
90
|
+
else:
|
|
91
|
+
self.text = None
|
|
92
|
+
if hasattr(self.obj, 'tail'):
|
|
93
|
+
self.tail = ensure_str(self.obj.tail)
|
|
94
|
+
else:
|
|
95
|
+
self.tail = None
|
|
96
|
+
|
|
97
|
+
def __getattr__(self, name):
|
|
98
|
+
return getattr(self.obj, name)
|
|
99
|
+
|
|
100
|
+
def getnext(self):
|
|
101
|
+
siblings = self.root_node.children
|
|
102
|
+
idx = siblings.index(self)
|
|
103
|
+
if idx < len(siblings) - 1:
|
|
104
|
+
return siblings[idx + 1]
|
|
105
|
+
else:
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
def __getitem__(self, key):
|
|
109
|
+
return self.obj[key]
|
|
110
|
+
|
|
111
|
+
def __bool__(self):
|
|
112
|
+
return bool(self.obj)
|
|
113
|
+
|
|
114
|
+
def getparent(self):
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
def __str__(self):
|
|
118
|
+
return str(self.obj)
|
|
119
|
+
|
|
120
|
+
def __unicode__(self):
|
|
121
|
+
return str(self.obj)
|
|
122
|
+
|
|
123
|
+
def __len__(self):
|
|
124
|
+
return len(self.obj)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class TreeWalker(base.NonRecursiveTreeWalker):
|
|
128
|
+
def __init__(self, tree):
|
|
129
|
+
# pylint:disable=redefined-variable-type
|
|
130
|
+
if isinstance(tree, list):
|
|
131
|
+
self.fragmentChildren = set(tree)
|
|
132
|
+
tree = FragmentRoot(tree)
|
|
133
|
+
else:
|
|
134
|
+
self.fragmentChildren = set()
|
|
135
|
+
tree = Root(tree)
|
|
136
|
+
base.NonRecursiveTreeWalker.__init__(self, tree)
|
|
137
|
+
self.filter = _ihatexml.InfosetFilter()
|
|
138
|
+
|
|
139
|
+
def getNodeDetails(self, node):
|
|
140
|
+
if isinstance(node, tuple): # Text node
|
|
141
|
+
node, key = node
|
|
142
|
+
assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
|
|
143
|
+
return base.TEXT, ensure_str(getattr(node, key))
|
|
144
|
+
|
|
145
|
+
elif isinstance(node, Root):
|
|
146
|
+
return (base.DOCUMENT,)
|
|
147
|
+
|
|
148
|
+
elif isinstance(node, Doctype):
|
|
149
|
+
return base.DOCTYPE, node.name, node.public_id, node.system_id
|
|
150
|
+
|
|
151
|
+
elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"):
|
|
152
|
+
return base.TEXT, ensure_str(node.obj)
|
|
153
|
+
|
|
154
|
+
elif node.tag == etree.Comment:
|
|
155
|
+
return base.COMMENT, ensure_str(node.text)
|
|
156
|
+
|
|
157
|
+
elif node.tag == etree.Entity:
|
|
158
|
+
return base.ENTITY, ensure_str(node.text)[1:-1] # strip &;
|
|
159
|
+
|
|
160
|
+
else:
|
|
161
|
+
# This is assumed to be an ordinary element
|
|
162
|
+
match = tag_regexp.match(ensure_str(node.tag))
|
|
163
|
+
if match:
|
|
164
|
+
namespace, tag = match.groups()
|
|
165
|
+
else:
|
|
166
|
+
namespace = None
|
|
167
|
+
tag = ensure_str(node.tag)
|
|
168
|
+
attrs = OrderedDict()
|
|
169
|
+
for name, value in list(node.attrib.items()):
|
|
170
|
+
name = ensure_str(name)
|
|
171
|
+
value = ensure_str(value)
|
|
172
|
+
match = tag_regexp.match(name)
|
|
173
|
+
if match:
|
|
174
|
+
attrs[(match.group(1), match.group(2))] = value
|
|
175
|
+
else:
|
|
176
|
+
attrs[(None, name)] = value
|
|
177
|
+
return (base.ELEMENT, namespace, self.filter.fromXmlName(tag),
|
|
178
|
+
attrs, len(node) > 0 or node.text)
|
|
179
|
+
|
|
180
|
+
def getFirstChild(self, node):
|
|
181
|
+
assert not isinstance(node, tuple), "Text nodes have no children"
|
|
182
|
+
|
|
183
|
+
assert len(node) or node.text, "Node has no children"
|
|
184
|
+
if node.text:
|
|
185
|
+
return (node, "text")
|
|
186
|
+
else:
|
|
187
|
+
return node[0]
|
|
188
|
+
|
|
189
|
+
def getNextSibling(self, node):
|
|
190
|
+
if isinstance(node, tuple): # Text node
|
|
191
|
+
node, key = node
|
|
192
|
+
assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
|
|
193
|
+
if key == "text":
|
|
194
|
+
# XXX: we cannot use a "bool(node) and node[0] or None" construct here
|
|
195
|
+
# because node[0] might evaluate to False if it has no child element
|
|
196
|
+
if len(node):
|
|
197
|
+
return node[0]
|
|
198
|
+
else:
|
|
199
|
+
return None
|
|
200
|
+
else: # tail
|
|
201
|
+
return node.getnext()
|
|
202
|
+
|
|
203
|
+
return (node, "tail") if node.tail else node.getnext()
|
|
204
|
+
|
|
205
|
+
def getParentNode(self, node):
|
|
206
|
+
if isinstance(node, tuple): # Text node
|
|
207
|
+
node, key = node
|
|
208
|
+
assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
|
|
209
|
+
if key == "text":
|
|
210
|
+
return node
|
|
211
|
+
# else: fallback to "normal" processing
|
|
212
|
+
elif node in self.fragmentChildren:
|
|
213
|
+
return None
|
|
214
|
+
|
|
215
|
+
return node.getparent()
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
from __future__ import absolute_import, division, unicode_literals
|
|
2
|
+
|
|
3
|
+
from genshi.core import QName
|
|
4
|
+
from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
|
|
5
|
+
from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
|
|
6
|
+
|
|
7
|
+
from . import base
|
|
8
|
+
|
|
9
|
+
from ..constants import voidElements, namespaces
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TreeWalker(base.TreeWalker):
|
|
13
|
+
def __iter__(self):
|
|
14
|
+
# Buffer the events so we can pass in the following one
|
|
15
|
+
previous = None
|
|
16
|
+
for event in self.tree:
|
|
17
|
+
if previous is not None:
|
|
18
|
+
for token in self.tokens(previous, event):
|
|
19
|
+
yield token
|
|
20
|
+
previous = event
|
|
21
|
+
|
|
22
|
+
# Don't forget the final event!
|
|
23
|
+
if previous is not None:
|
|
24
|
+
for token in self.tokens(previous, None):
|
|
25
|
+
yield token
|
|
26
|
+
|
|
27
|
+
def tokens(self, event, next):
|
|
28
|
+
kind, data, _ = event
|
|
29
|
+
if kind == START:
|
|
30
|
+
tag, attribs = data
|
|
31
|
+
name = tag.localname
|
|
32
|
+
namespace = tag.namespace
|
|
33
|
+
converted_attribs = {}
|
|
34
|
+
for k, v in attribs:
|
|
35
|
+
if isinstance(k, QName):
|
|
36
|
+
converted_attribs[(k.namespace, k.localname)] = v
|
|
37
|
+
else:
|
|
38
|
+
converted_attribs[(None, k)] = v
|
|
39
|
+
|
|
40
|
+
if namespace == namespaces["html"] and name in voidElements:
|
|
41
|
+
for token in self.emptyTag(namespace, name, converted_attribs,
|
|
42
|
+
not next or next[0] != END or
|
|
43
|
+
next[1] != tag):
|
|
44
|
+
yield token
|
|
45
|
+
else:
|
|
46
|
+
yield self.startTag(namespace, name, converted_attribs)
|
|
47
|
+
|
|
48
|
+
elif kind == END:
|
|
49
|
+
name = data.localname
|
|
50
|
+
namespace = data.namespace
|
|
51
|
+
if namespace != namespaces["html"] or name not in voidElements:
|
|
52
|
+
yield self.endTag(namespace, name)
|
|
53
|
+
|
|
54
|
+
elif kind == COMMENT:
|
|
55
|
+
yield self.comment(data)
|
|
56
|
+
|
|
57
|
+
elif kind == TEXT:
|
|
58
|
+
for token in self.text(data):
|
|
59
|
+
yield token
|
|
60
|
+
|
|
61
|
+
elif kind == DOCTYPE:
|
|
62
|
+
yield self.doctype(*data)
|
|
63
|
+
|
|
64
|
+
elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS,
|
|
65
|
+
START_CDATA, END_CDATA, PI):
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
else:
|
|
69
|
+
yield self.unknown(kind)
|