weasyprint 67.0__py3-none-any.whl → 68.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- weasyprint/__init__.py +35 -103
- weasyprint/__main__.py +107 -80
- weasyprint/css/__init__.py +4 -10
- weasyprint/css/functions.py +5 -0
- weasyprint/css/html5_ua.css +1 -1
- weasyprint/css/tokens.py +4 -1
- weasyprint/css/validation/properties.py +4 -4
- weasyprint/document.py +4 -64
- weasyprint/draw/text.py +4 -2
- weasyprint/formatting_structure/boxes.py +4 -1
- weasyprint/formatting_structure/build.py +111 -37
- weasyprint/images.py +27 -32
- weasyprint/layout/__init__.py +2 -1
- weasyprint/layout/grid.py +25 -14
- weasyprint/layout/page.py +4 -4
- weasyprint/layout/preferred.py +35 -2
- weasyprint/pdf/__init__.py +12 -1
- weasyprint/pdf/anchors.py +10 -16
- weasyprint/pdf/fonts.py +12 -3
- weasyprint/pdf/metadata.py +153 -98
- weasyprint/pdf/pdfa.py +1 -3
- weasyprint/pdf/pdfua.py +1 -3
- weasyprint/pdf/pdfx.py +1 -3
- weasyprint/pdf/stream.py +0 -2
- weasyprint/svg/__init__.py +51 -30
- weasyprint/svg/css.py +21 -4
- weasyprint/svg/defs.py +5 -3
- weasyprint/text/fonts.py +2 -3
- weasyprint/urls.py +272 -96
- {weasyprint-67.0.dist-info → weasyprint-68.0.dist-info}/METADATA +2 -1
- {weasyprint-67.0.dist-info → weasyprint-68.0.dist-info}/RECORD +34 -34
- {weasyprint-67.0.dist-info → weasyprint-68.0.dist-info}/WHEEL +0 -0
- {weasyprint-67.0.dist-info → weasyprint-68.0.dist-info}/entry_points.txt +0 -0
- {weasyprint-67.0.dist-info → weasyprint-68.0.dist-info}/licenses/LICENSE +0 -0
weasyprint/pdf/stream.py
CHANGED
weasyprint/svg/__init__.py
CHANGED
|
@@ -155,6 +155,14 @@ class Node:
|
|
|
155
155
|
for name, value in declarations:
|
|
156
156
|
child.attrib[name] = value.strip()
|
|
157
157
|
|
|
158
|
+
# Expand
|
|
159
|
+
# TODO: simplified expanders, use CSS expander code instead.
|
|
160
|
+
if font := child.attrib.pop('font', None):
|
|
161
|
+
parts = font.strip().split(maxsplit=1)
|
|
162
|
+
if len(parts) == 2:
|
|
163
|
+
child.attrib['font-size'] = parts[0]
|
|
164
|
+
child.attrib['font-family'] = parts[1]
|
|
165
|
+
|
|
158
166
|
# Replace 'currentColor' value
|
|
159
167
|
for key in COLOR_ATTRIBUTES:
|
|
160
168
|
if child.get(key) == 'currentColor':
|
|
@@ -219,6 +227,8 @@ class Node:
|
|
|
219
227
|
|
|
220
228
|
def get_child(self, id_):
|
|
221
229
|
"""Get a child with given id in the whole child tree."""
|
|
230
|
+
if self._etree_node.find(f'.//*[@id="{id_}"]') is None:
|
|
231
|
+
return
|
|
222
232
|
for child in self:
|
|
223
233
|
if child.get('id') == id_:
|
|
224
234
|
return child
|
|
@@ -324,23 +334,52 @@ class Node:
|
|
|
324
334
|
svg.inner_diagonal = hypot(svg.inner_width, svg.inner_height) / sqrt(2)
|
|
325
335
|
|
|
326
336
|
|
|
337
|
+
class LazyDefs:
|
|
338
|
+
def __init__(self, name, svg):
|
|
339
|
+
self._name = name
|
|
340
|
+
self._svg = svg
|
|
341
|
+
self._data = {}
|
|
342
|
+
|
|
343
|
+
def __getitem__(self, name):
|
|
344
|
+
return self.get(name)
|
|
345
|
+
|
|
346
|
+
def get(self, name):
|
|
347
|
+
if not name:
|
|
348
|
+
return
|
|
349
|
+
if name in self._data:
|
|
350
|
+
return self._data[name]
|
|
351
|
+
node = self._svg.tree.get_child(name)
|
|
352
|
+
if node is not None and self._name in node.tag.lower():
|
|
353
|
+
self._data[name] = node
|
|
354
|
+
if self._name in ('gradient', 'pattern'):
|
|
355
|
+
self._svg.inherit_element(node, self)
|
|
356
|
+
else:
|
|
357
|
+
self._data[name] = None
|
|
358
|
+
return self._data[name]
|
|
359
|
+
|
|
360
|
+
def __contains__(self, name):
|
|
361
|
+
return self.get(name)
|
|
362
|
+
|
|
363
|
+
|
|
327
364
|
class SVG:
|
|
328
365
|
"""An SVG document."""
|
|
329
366
|
|
|
330
|
-
def __init__(self, tree, url, font_config):
|
|
367
|
+
def __init__(self, tree, url, font_config, url_fetcher=None):
|
|
331
368
|
wrapper = ElementWrapper.from_xml_root(tree)
|
|
332
|
-
style = parse_stylesheets(wrapper, url)
|
|
369
|
+
style = parse_stylesheets(wrapper, url, font_config, url_fetcher)
|
|
333
370
|
self.tree = Node(wrapper, style)
|
|
334
371
|
self.font_config = font_config
|
|
372
|
+
self.url_fetcher = url_fetcher
|
|
335
373
|
self.url = url
|
|
336
|
-
|
|
337
|
-
self.
|
|
338
|
-
self.
|
|
339
|
-
self.
|
|
340
|
-
self.
|
|
341
|
-
self.
|
|
342
|
-
self.
|
|
343
|
-
self.
|
|
374
|
+
|
|
375
|
+
self.filters = LazyDefs('filter', self)
|
|
376
|
+
self.gradients = LazyDefs('gradient', self)
|
|
377
|
+
self.images = LazyDefs('image', self)
|
|
378
|
+
self.markers = LazyDefs('marker', self)
|
|
379
|
+
self.masks = LazyDefs('mask', self)
|
|
380
|
+
self.patterns = LazyDefs('pattern', self)
|
|
381
|
+
self.paths = LazyDefs('path', self)
|
|
382
|
+
self.symbols = LazyDefs('symbol', self)
|
|
344
383
|
|
|
345
384
|
self.use_cache = {}
|
|
346
385
|
|
|
@@ -349,8 +388,6 @@ class SVG:
|
|
|
349
388
|
self.text_path_width = 0
|
|
350
389
|
|
|
351
390
|
self.tree.cascade(self.tree)
|
|
352
|
-
self.parse_defs(self.tree)
|
|
353
|
-
self.inherit_defs()
|
|
354
391
|
|
|
355
392
|
def get_intrinsic_size(self, font_size):
|
|
356
393
|
"""Get intrinsic size of the image."""
|
|
@@ -382,15 +419,13 @@ class SVG:
|
|
|
382
419
|
"""Compute size of an arbirtary attribute."""
|
|
383
420
|
return size(length, font_size, self.inner_diagonal)
|
|
384
421
|
|
|
385
|
-
def draw(self, stream, concrete_width, concrete_height, base_url,
|
|
386
|
-
url_fetcher, context):
|
|
422
|
+
def draw(self, stream, concrete_width, concrete_height, base_url, context):
|
|
387
423
|
"""Draw image on a stream."""
|
|
388
424
|
self.stream = stream
|
|
389
425
|
|
|
390
426
|
self.tree.set_svg_size(self, concrete_width, concrete_height)
|
|
391
427
|
|
|
392
428
|
self.base_url = base_url
|
|
393
|
-
self.url_fetcher = url_fetcher
|
|
394
429
|
self.context = context
|
|
395
430
|
|
|
396
431
|
self.draw_node(self.tree, size('12pt'))
|
|
@@ -796,20 +831,6 @@ class SVG:
|
|
|
796
831
|
if matrix.determinant:
|
|
797
832
|
self.stream.transform(*matrix.values)
|
|
798
833
|
|
|
799
|
-
def parse_defs(self, node):
|
|
800
|
-
"""Parse defs included in a tree."""
|
|
801
|
-
for def_type in DEF_TYPES:
|
|
802
|
-
if def_type in node.tag.lower() and 'id' in node.attrib:
|
|
803
|
-
getattr(self, f'{def_type}s')[node.attrib['id']] = node
|
|
804
|
-
for child in node:
|
|
805
|
-
self.parse_defs(child)
|
|
806
|
-
|
|
807
|
-
def inherit_defs(self):
|
|
808
|
-
"""Handle inheritance of different defined elements lists."""
|
|
809
|
-
for defs in (self.gradients, self.patterns):
|
|
810
|
-
for element in defs.values():
|
|
811
|
-
self.inherit_element(element, defs)
|
|
812
|
-
|
|
813
834
|
def inherit_element(self, element, defs):
|
|
814
835
|
"""Recursively handle inheritance of defined element."""
|
|
815
836
|
href = element.get_href(self.url)
|
|
@@ -840,7 +861,7 @@ class SVG:
|
|
|
840
861
|
class Pattern(SVG):
|
|
841
862
|
"""SVG node applied as a pattern."""
|
|
842
863
|
def __init__(self, tree, svg):
|
|
843
|
-
super().__init__(tree._etree_node, svg.url, svg.font_config)
|
|
864
|
+
super().__init__(tree._etree_node, svg.url, svg.font_config, svg.url_fetcher)
|
|
844
865
|
self.svg = svg
|
|
845
866
|
self.tree = tree
|
|
846
867
|
|
weasyprint/svg/css.py
CHANGED
|
@@ -5,11 +5,12 @@ from urllib.parse import urljoin
|
|
|
5
5
|
import cssselect2
|
|
6
6
|
import tinycss2
|
|
7
7
|
|
|
8
|
+
from ..css.validation.descriptors import preprocess_descriptors
|
|
8
9
|
from ..logger import LOGGER
|
|
9
10
|
from .utils import parse_url
|
|
10
11
|
|
|
11
12
|
|
|
12
|
-
def find_stylesheets_rules(tree, stylesheet_rules, url):
|
|
13
|
+
def find_stylesheets_rules(tree, stylesheet_rules, url, font_config, url_fetcher):
|
|
13
14
|
"""Find rules among stylesheet rules and imports."""
|
|
14
15
|
for rule in stylesheet_rules:
|
|
15
16
|
if rule.type == 'at-rule':
|
|
@@ -22,7 +23,22 @@ def find_stylesheets_rules(tree, stylesheet_rules, url):
|
|
|
22
23
|
stylesheet = tinycss2.parse_stylesheet(
|
|
23
24
|
tree.fetch_url(css_url, 'text/css').decode())
|
|
24
25
|
url = css_url.geturl()
|
|
25
|
-
yield from find_stylesheets_rules(
|
|
26
|
+
yield from find_stylesheets_rules(
|
|
27
|
+
tree, stylesheet, url, font_config, url_fetcher)
|
|
28
|
+
elif rule.lower_at_keyword == 'font-face':
|
|
29
|
+
if font_config is not None and url_fetcher is not None:
|
|
30
|
+
content = tinycss2.parse_blocks_contents(rule.content)
|
|
31
|
+
rule_descriptors = dict(
|
|
32
|
+
preprocess_descriptors('font-face', url, content))
|
|
33
|
+
for key in ('src', 'font_family'):
|
|
34
|
+
if key not in rule_descriptors:
|
|
35
|
+
LOGGER.warning(
|
|
36
|
+
"Missing %s descriptor in '@font-face' rule at "
|
|
37
|
+
"%d:%d", key.replace('_', '-'),
|
|
38
|
+
rule.source_line, rule.source_column)
|
|
39
|
+
break
|
|
40
|
+
else:
|
|
41
|
+
font_config.add_font_face(rule_descriptors, url_fetcher)
|
|
26
42
|
# TODO: support media types
|
|
27
43
|
# if rule.lower_at_keyword == 'media':
|
|
28
44
|
elif rule.type == 'qualified-rule':
|
|
@@ -49,7 +65,7 @@ def parse_declarations(input):
|
|
|
49
65
|
return normal_declarations, important_declarations
|
|
50
66
|
|
|
51
67
|
|
|
52
|
-
def parse_stylesheets(tree, url):
|
|
68
|
+
def parse_stylesheets(tree, url, font_config, url_fetcher):
|
|
53
69
|
"""Find stylesheets and return rule matchers in given tree."""
|
|
54
70
|
normal_matcher = cssselect2.Matcher()
|
|
55
71
|
important_matcher = cssselect2.Matcher()
|
|
@@ -70,7 +86,8 @@ def parse_stylesheets(tree, url):
|
|
|
70
86
|
|
|
71
87
|
# Parse rules and fill matchers
|
|
72
88
|
for stylesheet in stylesheets:
|
|
73
|
-
for rule in find_stylesheets_rules(
|
|
89
|
+
for rule in find_stylesheets_rules(
|
|
90
|
+
tree, stylesheet, url, font_config, url_fetcher):
|
|
74
91
|
normal_declarations, important_declarations = parse_declarations(
|
|
75
92
|
rule.content)
|
|
76
93
|
try:
|
weasyprint/svg/defs.py
CHANGED
|
@@ -102,10 +102,12 @@ def draw_gradient(svg, node, gradient, font_size, opacity, stroke):
|
|
|
102
102
|
return False
|
|
103
103
|
if gradient.get('gradientUnits') == 'userSpaceOnUse':
|
|
104
104
|
width, height = svg.inner_width, svg.inner_height
|
|
105
|
+
bx1, by1 = bounding_box[:2]
|
|
105
106
|
matrix = Matrix()
|
|
106
107
|
else:
|
|
107
108
|
width, height = 1, 1
|
|
108
109
|
e, f, a, d = bounding_box
|
|
110
|
+
bx1, by1 = 0, 0
|
|
109
111
|
matrix = Matrix(a=a, d=d, e=e, f=f)
|
|
110
112
|
|
|
111
113
|
spread = gradient.get('spreadMethod', 'pad')
|
|
@@ -180,10 +182,10 @@ def draw_gradient(svg, node, gradient, font_size, opacity, stroke):
|
|
|
180
182
|
if 0 not in (a0, a1) and (a0, a1) != (1, 1):
|
|
181
183
|
color_couples[i][2] = a0 / a1
|
|
182
184
|
|
|
183
|
-
bx1, by1 = 0, 0
|
|
184
185
|
if 'gradientTransform' in gradient.attrib:
|
|
186
|
+
bx2, by2 = bx1 + width, by1 + height
|
|
185
187
|
bx1, by1 = transform_matrix.invert.transform_point(bx1, by1)
|
|
186
|
-
bx2, by2 = transform_matrix.invert.transform_point(
|
|
188
|
+
bx2, by2 = transform_matrix.invert.transform_point(bx2, by2)
|
|
187
189
|
width, height = bx2 - bx1, by2 - by1
|
|
188
190
|
|
|
189
191
|
# Ensure that width and height are positive to please some PDF readers
|
|
@@ -457,7 +459,7 @@ def draw_pattern(svg, node, pattern, font_size, opacity, stroke):
|
|
|
457
459
|
group = stream_pattern.add_group(0, 0, pattern_width, pattern_height)
|
|
458
460
|
Pattern(pattern, svg).draw(
|
|
459
461
|
group, pattern_width, pattern_height, svg.base_url,
|
|
460
|
-
svg.
|
|
462
|
+
svg.context)
|
|
461
463
|
stream_pattern.draw_x_object(group.id)
|
|
462
464
|
svg.stream.set_color_space('Pattern', stroke=stroke)
|
|
463
465
|
svg.stream.set_color_special(stream_pattern.id, stroke=stroke)
|
weasyprint/text/fonts.py
CHANGED
|
@@ -167,9 +167,8 @@ class FontConfiguration:
|
|
|
167
167
|
|
|
168
168
|
# Get font content.
|
|
169
169
|
try:
|
|
170
|
-
with fetch(url_fetcher, url) as
|
|
171
|
-
|
|
172
|
-
font = result['string'] if string else result['file_obj'].read()
|
|
170
|
+
with fetch(url_fetcher, url) as response:
|
|
171
|
+
font = response.read()
|
|
173
172
|
except Exception as exception:
|
|
174
173
|
LOGGER.debug('Failed to load font at %r (%s)', url, exception)
|
|
175
174
|
continue
|
weasyprint/urls.py
CHANGED
|
@@ -6,11 +6,14 @@ import os.path
|
|
|
6
6
|
import re
|
|
7
7
|
import sys
|
|
8
8
|
import traceback
|
|
9
|
+
import warnings
|
|
9
10
|
import zlib
|
|
11
|
+
from email.message import EmailMessage
|
|
10
12
|
from gzip import GzipFile
|
|
13
|
+
from io import BytesIO, StringIO
|
|
11
14
|
from pathlib import Path
|
|
15
|
+
from urllib import request
|
|
12
16
|
from urllib.parse import quote, unquote, urljoin, urlsplit
|
|
13
|
-
from urllib.request import Request, pathname2url, url2pathname, urlopen
|
|
14
17
|
|
|
15
18
|
from . import __version__
|
|
16
19
|
from .logger import LOGGER
|
|
@@ -55,8 +58,7 @@ def iri_to_uri(url):
|
|
|
55
58
|
# Data URIs can be huge, but don’t need this anyway.
|
|
56
59
|
return url
|
|
57
60
|
# Use UTF-8 as per RFC 3987 (IRI), except for file://
|
|
58
|
-
url = url.encode(
|
|
59
|
-
FILESYSTEM_ENCODING if url.startswith('file:') else 'utf-8')
|
|
61
|
+
url = url.encode(FILESYSTEM_ENCODING if url.startswith('file:') else 'utf-8')
|
|
60
62
|
# This is a full URI, not just a component. Only %-encode characters
|
|
61
63
|
# that are not allowed at all in URIs. Everthing else is "safe":
|
|
62
64
|
# * Reserved characters: /:?#[]@!$&'()*+,;=
|
|
@@ -85,7 +87,7 @@ def path2url(path):
|
|
|
85
87
|
# Otherwise relative URIs are resolved from the parent directory.
|
|
86
88
|
path += os.path.sep
|
|
87
89
|
wants_trailing_slash = True
|
|
88
|
-
path = pathname2url(path)
|
|
90
|
+
path = request.pathname2url(path)
|
|
89
91
|
# On Windows pathname2url cuts off trailing slash
|
|
90
92
|
if wants_trailing_slash and not path.endswith('/'):
|
|
91
93
|
path += '/' # pragma: no cover
|
|
@@ -191,114 +193,288 @@ def default_url_fetcher(url, timeout=10, ssl_context=None, http_headers=None,
|
|
|
191
193
|
allowed_protocols=None):
|
|
192
194
|
"""Fetch an external resource such as an image or stylesheet.
|
|
193
195
|
|
|
194
|
-
|
|
195
|
-
``url_fetcher`` argument to :class:`HTML` or :class:`CSS`.
|
|
196
|
-
(See :ref:`URL Fetchers`.)
|
|
197
|
-
|
|
198
|
-
:param str url:
|
|
199
|
-
The URL of the resource to fetch.
|
|
200
|
-
:param int timeout:
|
|
201
|
-
The number of seconds before HTTP requests are dropped.
|
|
202
|
-
:param ssl.SSLContext ssl_context:
|
|
203
|
-
An SSL context used for HTTP requests.
|
|
204
|
-
:param dict http_headers:
|
|
205
|
-
Additional HTTP headers used for HTTP requests.
|
|
206
|
-
:param set allowed_protocols:
|
|
207
|
-
A set of authorized protocols.
|
|
208
|
-
:raises: An exception indicating failure, e.g. :obj:`ValueError` on
|
|
209
|
-
syntactically invalid URL.
|
|
210
|
-
:returns: A :obj:`dict` with the following keys:
|
|
211
|
-
|
|
212
|
-
* One of ``string`` (a :obj:`bytestring <bytes>`) or ``file_obj``
|
|
213
|
-
(a :term:`file object`).
|
|
214
|
-
* Optionally: ``mime_type``, a MIME type extracted e.g. from a
|
|
215
|
-
*Content-Type* header. If not provided, the type is guessed from the
|
|
216
|
-
file extension in the URL.
|
|
217
|
-
* Optionally: ``encoding``, a character encoding extracted e.g. from a
|
|
218
|
-
*charset* parameter in a *Content-Type* header
|
|
219
|
-
* Optionally: ``redirected_url``, the actual URL of the resource
|
|
220
|
-
if there were e.g. HTTP redirects.
|
|
221
|
-
* Optionally: ``filename``, the filename of the resource. Usually
|
|
222
|
-
derived from the *filename* parameter in a *Content-Disposition*
|
|
223
|
-
header.
|
|
224
|
-
* Optionally: ``path``, the path of the resource if it is stored on the
|
|
225
|
-
local filesystem.
|
|
226
|
-
|
|
227
|
-
If a ``file_obj`` key is given, it is the caller’s responsibility
|
|
228
|
-
to call ``file_obj.close()``. The default function used internally to
|
|
229
|
-
fetch data in WeasyPrint tries to close the file object after
|
|
230
|
-
retreiving; but if this URL fetcher is used elsewhere, the file object
|
|
231
|
-
has to be closed manually.
|
|
196
|
+
This function is deprecated, use ``URLFetcher`` instead.
|
|
232
197
|
|
|
233
198
|
"""
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
199
|
+
warnings.warn(
|
|
200
|
+
"default_url_fetcher is deprecated and will be removed in WeasyPrint 69.0, "
|
|
201
|
+
"please use URLFetcher instead. For security reasons, HTTP redirects are not "
|
|
202
|
+
"supported anymore with default_url_fetcher, but are with URLFetcher.\n\nSee "
|
|
203
|
+
"https://doc.courtbouillon.org/weasyprint/stable/first_steps.html#url-fetchers",
|
|
204
|
+
category=DeprecationWarning)
|
|
205
|
+
fetcher = URLFetcher(
|
|
206
|
+
timeout, ssl_context, http_headers, allowed_protocols, allow_redirects=False)
|
|
207
|
+
return fetcher.fetch(url)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
@contextlib.contextmanager
|
|
211
|
+
def select_source(guess=None, filename=None, url=None, file_obj=None, string=None,
|
|
212
|
+
base_url=None, url_fetcher=None, check_css_mime_type=False):
|
|
213
|
+
"""If only one input is given, return it.
|
|
214
|
+
|
|
215
|
+
Yield a file object, the base url, the protocol encoding and the protocol mime-type.
|
|
216
|
+
|
|
217
|
+
"""
|
|
218
|
+
if base_url is not None:
|
|
219
|
+
base_url = ensure_url(base_url)
|
|
220
|
+
if url_fetcher is None:
|
|
221
|
+
url_fetcher = URLFetcher()
|
|
222
|
+
|
|
223
|
+
selected_params = [
|
|
224
|
+
param for param in (guess, filename, url, file_obj, string) if
|
|
225
|
+
param is not None]
|
|
226
|
+
if len(selected_params) != 1:
|
|
227
|
+
source = ', '.join(selected_params) or 'nothing'
|
|
228
|
+
raise TypeError(f'Expected exactly one source, got {source}')
|
|
229
|
+
elif guess is not None:
|
|
230
|
+
kwargs = {
|
|
231
|
+
'base_url': base_url,
|
|
232
|
+
'url_fetcher': url_fetcher,
|
|
233
|
+
'check_css_mime_type': check_css_mime_type,
|
|
234
|
+
}
|
|
235
|
+
if hasattr(guess, 'read'):
|
|
236
|
+
kwargs['file_obj'] = guess
|
|
237
|
+
elif isinstance(guess, Path):
|
|
238
|
+
kwargs['filename'] = guess
|
|
239
|
+
elif url_is_absolute(guess):
|
|
240
|
+
kwargs['url'] = guess
|
|
241
|
+
else:
|
|
242
|
+
kwargs['filename'] = guess
|
|
243
|
+
result = select_source(**kwargs)
|
|
244
|
+
with result as result:
|
|
245
|
+
yield result
|
|
246
|
+
elif filename is not None:
|
|
247
|
+
if base_url is None:
|
|
248
|
+
base_url = path2url(filename)
|
|
249
|
+
with open(filename, 'rb') as file_obj:
|
|
250
|
+
yield file_obj, base_url, None, None
|
|
251
|
+
elif url is not None:
|
|
252
|
+
with fetch(url_fetcher, url) as response:
|
|
253
|
+
if check_css_mime_type and response.content_type != 'text/css':
|
|
254
|
+
LOGGER.error(
|
|
255
|
+
f'Unsupported stylesheet type {response.content_type} '
|
|
256
|
+
f'for {response.url}')
|
|
257
|
+
yield StringIO(''), base_url, None, None
|
|
258
|
+
else:
|
|
259
|
+
if base_url is None:
|
|
260
|
+
base_url = response.url
|
|
261
|
+
yield response, base_url, response.charset, response.content_type
|
|
262
|
+
elif file_obj is not None:
|
|
263
|
+
if base_url is None:
|
|
264
|
+
# filesystem file-like objects have a 'name' attribute.
|
|
265
|
+
name = getattr(file_obj, 'name', None)
|
|
266
|
+
# Some streams have a .name like '<stdin>', not a filename.
|
|
267
|
+
if name and not name.startswith('<'):
|
|
268
|
+
base_url = ensure_url(name)
|
|
269
|
+
yield file_obj, base_url, None, None
|
|
270
|
+
else:
|
|
271
|
+
if isinstance(string, str):
|
|
272
|
+
yield StringIO(string), base_url, None, None
|
|
273
|
+
else:
|
|
274
|
+
yield BytesIO(string), base_url, None, None
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
class URLFetchingError(IOError):
|
|
278
|
+
"""Some error happened when fetching an URL."""
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
class FatalURLFetchingError(BaseException):
|
|
282
|
+
"""Some error happened when fetching an URL and must stop the rendering."""
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
class URLFetcher(request.OpenerDirector):
|
|
286
|
+
"""Fetcher of external resources such as images or stylesheets.
|
|
287
|
+
|
|
288
|
+
:param int timeout: The number of seconds before HTTP requests are dropped.
|
|
289
|
+
:param ssl.SSLContext ssl_context: An SSL context used for HTTPS requests.
|
|
290
|
+
:param dict http_headers: Additional HTTP headers used for HTTP requests.
|
|
291
|
+
:type allowed_protocols: :term:`sequence`
|
|
292
|
+
:param allowed_protocols: A set of authorized protocols, :obj:`None` means all.
|
|
293
|
+
:param bool allow_redirects: Whether HTTP redirects must be followed.
|
|
294
|
+
:param bool fail_on_errors: Whether HTTP errors should stop the rendering.
|
|
295
|
+
|
|
296
|
+
Another class inheriting from this class, with a ``fetch`` method that has a
|
|
297
|
+
compatible signature, can be given as the ``url_fetcher`` argument to
|
|
298
|
+
:class:`weasyprint.HTML` or :class:`weasyprint.CSS`.
|
|
299
|
+
|
|
300
|
+
See :ref:`URL Fetchers` for more information and examples.
|
|
301
|
+
|
|
302
|
+
"""
|
|
303
|
+
|
|
304
|
+
def __init__(self, timeout=10, ssl_context=None, http_headers=None,
|
|
305
|
+
allowed_protocols=None, allow_redirects=True, fail_on_errors=False,
|
|
306
|
+
**kwargs):
|
|
307
|
+
super().__init__()
|
|
308
|
+
handlers = [
|
|
309
|
+
request.ProxyHandler(), request.UnknownHandler(), request.HTTPHandler(),
|
|
310
|
+
request.HTTPDefaultErrorHandler(), request.FTPHandler(),
|
|
311
|
+
request.FileHandler(), request.HTTPErrorProcessor(), request.DataHandler(),
|
|
312
|
+
request.HTTPSHandler(context=ssl_context)]
|
|
313
|
+
if allow_redirects:
|
|
314
|
+
handlers.append(request.HTTPRedirectHandler())
|
|
315
|
+
for handler in handlers:
|
|
316
|
+
self.add_handler(handler)
|
|
317
|
+
|
|
318
|
+
self._timeout = timeout
|
|
319
|
+
self._http_headers = {**HTTP_HEADERS, **(http_headers or {})}
|
|
320
|
+
self._allowed_protocols = allowed_protocols
|
|
321
|
+
self._fail_on_errors = fail_on_errors
|
|
322
|
+
|
|
323
|
+
def fetch(self, url, headers=None):
|
|
324
|
+
"""Fetch a given URL.
|
|
325
|
+
|
|
326
|
+
:returns: A :obj:`URLFetcherResponse` instance.
|
|
327
|
+
:raises: An exception indicating failure, e.g. :obj:`ValueError` on
|
|
328
|
+
syntactically invalid URL. All exceptions are catched internally by
|
|
329
|
+
WeasyPrint, except when they inherit from :obj:`FatalURLFetchingError`.
|
|
330
|
+
|
|
331
|
+
"""
|
|
332
|
+
# Discard URLs with no or invalid protocol.
|
|
333
|
+
if not UNICODE_SCHEME_RE.match(url): # pragma: no cover
|
|
334
|
+
raise ValueError(f'Not an absolute URI: {url}')
|
|
335
|
+
|
|
336
|
+
# Discard URLs with forbidden protocol.
|
|
337
|
+
if self._allowed_protocols is not None:
|
|
338
|
+
if url.split('://', 1)[0].lower() not in self._allowed_protocols:
|
|
237
339
|
raise ValueError(f'URI uses disallowed protocol: {url}')
|
|
238
340
|
|
|
239
|
-
#
|
|
341
|
+
# Remove query and fragment parts from file URLs.
|
|
342
|
+
# See https://bugs.python.org/issue34702.
|
|
240
343
|
if url.lower().startswith('file://'):
|
|
241
344
|
url = url.split('?')[0]
|
|
242
|
-
path = url2pathname(url.removeprefix('file:'))
|
|
243
|
-
else:
|
|
244
|
-
path = None
|
|
245
345
|
|
|
346
|
+
# Transform Unicode IRI to ASCII URI.
|
|
246
347
|
url = iri_to_uri(url)
|
|
247
|
-
|
|
248
|
-
|
|
348
|
+
|
|
349
|
+
# Open URL.
|
|
350
|
+
headers = {**self._http_headers, **(headers or {})}
|
|
351
|
+
http_request = request.Request(url, headers=headers)
|
|
352
|
+
response = super().open(http_request, timeout=self._timeout)
|
|
353
|
+
|
|
354
|
+
# Decompress response.
|
|
355
|
+
body = response
|
|
356
|
+
if 'Content-Encoding' in response.headers:
|
|
357
|
+
content_encoding = response.headers['Content-Encoding']
|
|
358
|
+
del response.headers['Content-Encoding']
|
|
359
|
+
if content_encoding == 'gzip':
|
|
360
|
+
body = StreamingGzipFile(fileobj=response)
|
|
361
|
+
elif content_encoding == 'deflate':
|
|
362
|
+
data = response.read()
|
|
363
|
+
try:
|
|
364
|
+
body = zlib.decompress(data)
|
|
365
|
+
except zlib.error:
|
|
366
|
+
# Try without zlib header or checksum.
|
|
367
|
+
body = zlib.decompress(data, -15)
|
|
368
|
+
|
|
369
|
+
return URLFetcherResponse(response.url, body, response.headers, response.status)
|
|
370
|
+
|
|
371
|
+
def open(self, url, data=None, timeout=None):
|
|
372
|
+
if isinstance(url, request.Request):
|
|
373
|
+
return self.fetch(url.full_url, url.headers)
|
|
374
|
+
return self.fetch(url)
|
|
375
|
+
|
|
376
|
+
def __call__(self, url):
|
|
377
|
+
return self.fetch(url)
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
class URLFetcherResponse:
|
|
381
|
+
"""The HTTP response of an URL fetcher.
|
|
382
|
+
|
|
383
|
+
:param str url: The URL of the HTTP response.
|
|
384
|
+
:type body: :class:`str`, :class:`bytes` or :term:`file object`
|
|
385
|
+
:param body: The body of the HTTP response.
|
|
386
|
+
:type headers: dict or email.message.EmailMessage
|
|
387
|
+
:param headers: The headers of the HTTP response.
|
|
388
|
+
:param str status: The status of the HTTP response.
|
|
389
|
+
|
|
390
|
+
Has the same interface as :class:`urllib.response.addinfourl`.
|
|
391
|
+
|
|
392
|
+
If a :term:`file object` is given for the body, it is the caller’s responsibility to
|
|
393
|
+
call ``close()`` on it. The default function used internally to fetch data in
|
|
394
|
+
WeasyPrint tries to close the file object after retreiving; but if this URL fetcher
|
|
395
|
+
is used elsewhere, the file object has to be closed manually.
|
|
396
|
+
|
|
397
|
+
"""
|
|
398
|
+
def __init__(self, url, body=None, headers=None, status='200 OK', **kwargs):
|
|
399
|
+
self.url = url
|
|
400
|
+
self.status = status
|
|
401
|
+
|
|
402
|
+
if isinstance(headers, EmailMessage):
|
|
403
|
+
self.headers = headers
|
|
249
404
|
else:
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
'path': path,
|
|
260
|
-
}
|
|
261
|
-
content_encoding = response.headers.get('Content-Encoding')
|
|
262
|
-
if content_encoding == 'gzip':
|
|
263
|
-
result['file_obj'] = StreamingGzipFile(fileobj=response)
|
|
264
|
-
elif content_encoding == 'deflate':
|
|
265
|
-
data = response.read()
|
|
266
|
-
try:
|
|
267
|
-
result['string'] = zlib.decompress(data)
|
|
268
|
-
except zlib.error:
|
|
269
|
-
# Try without zlib header or checksum
|
|
270
|
-
result['string'] = zlib.decompress(data, -15)
|
|
405
|
+
self.headers = EmailMessage()
|
|
406
|
+
for key, value in (headers or {}).items():
|
|
407
|
+
self.headers[key] = value
|
|
408
|
+
|
|
409
|
+
if hasattr(body, 'read'):
|
|
410
|
+
self._file_obj = body
|
|
411
|
+
elif isinstance(body, str):
|
|
412
|
+
self.headers.set_param('charset', 'utf-8')
|
|
413
|
+
self._file_obj = BytesIO(body.encode('utf-8'))
|
|
271
414
|
else:
|
|
272
|
-
|
|
273
|
-
return result
|
|
274
|
-
else: # pragma: no cover
|
|
275
|
-
raise ValueError(f'Not an absolute URI: {url}')
|
|
415
|
+
self._file_obj = BytesIO(body)
|
|
276
416
|
|
|
417
|
+
def read(self, *args, **kwargs):
|
|
418
|
+
return self._file_obj.read(*args, **kwargs)
|
|
277
419
|
|
|
278
|
-
|
|
279
|
-
|
|
420
|
+
def close(self):
|
|
421
|
+
try:
|
|
422
|
+
self._file_obj.close()
|
|
423
|
+
except Exception: # pragma: no cover
|
|
424
|
+
# May already be closed or something.
|
|
425
|
+
# This is just cleanup anyway: log but make it non-fatal.
|
|
426
|
+
LOGGER.warning(
|
|
427
|
+
'Error when closing stream for %s:\n%s',
|
|
428
|
+
self.url, traceback.format_exc())
|
|
429
|
+
|
|
430
|
+
@property
|
|
431
|
+
def path(self):
|
|
432
|
+
if self.url.startswith('file:'):
|
|
433
|
+
return request.url2pathname(self.url.split('?')[0].removeprefix('file:'))
|
|
434
|
+
|
|
435
|
+
@property
|
|
436
|
+
def content_type(self):
|
|
437
|
+
return self.headers.get_content_type()
|
|
438
|
+
|
|
439
|
+
@property
|
|
440
|
+
def charset(self):
|
|
441
|
+
return self.headers.get_param('charset')
|
|
280
442
|
|
|
281
443
|
|
|
282
444
|
@contextlib.contextmanager
|
|
283
445
|
def fetch(url_fetcher, url):
|
|
284
|
-
"""
|
|
446
|
+
"""Fetch an ``url`` with ```url_fetcher``, fill in optional data, and clean up.
|
|
447
|
+
|
|
448
|
+
Fatal errors must raise a ``FatalURLFetchingError`` that stops the rendering. All
|
|
449
|
+
other exceptions are catched and raise an ``URLFetchingError``, that is usually
|
|
450
|
+
catched by the code that fetches the resource and emits a warning.
|
|
451
|
+
|
|
452
|
+
"""
|
|
285
453
|
try:
|
|
286
|
-
|
|
454
|
+
resource = url_fetcher(url)
|
|
287
455
|
except Exception as exception:
|
|
456
|
+
if getattr(url_fetcher, '_fail_on_errors', False):
|
|
457
|
+
raise FatalURLFetchingError(f'Error fetching "{url}"') from exception
|
|
288
458
|
raise URLFetchingError(f'{type(exception).__name__}: {exception}')
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
459
|
+
|
|
460
|
+
if isinstance(resource, dict):
|
|
461
|
+
warnings.warn(
|
|
462
|
+
"Returning dicts in URL fetchers is deprecated and will be removed "
|
|
463
|
+
"in WeasyPrint 69.0, please return URLFetcherResponse instead.",
|
|
464
|
+
category=DeprecationWarning)
|
|
465
|
+
if 'url' not in resource:
|
|
466
|
+
resource['url'] = resource.get('redirected_url', url)
|
|
467
|
+
resource['body'] = resource.get('file_obj', resource.get('string'))
|
|
468
|
+
content_type = resource.get('mime_type', 'application/octet-stream')
|
|
469
|
+
if charset := resource.get('encoding'):
|
|
470
|
+
content_type += f';{charset}'
|
|
471
|
+
resource['headers'] = {'Content-Type': content_type}
|
|
472
|
+
resource = URLFetcherResponse(**resource)
|
|
473
|
+
|
|
474
|
+
assert isinstance(resource, URLFetcherResponse), (
|
|
475
|
+
'URL fetcher must return either a dict or a URLFetcherResponse instance')
|
|
476
|
+
|
|
477
|
+
try:
|
|
478
|
+
yield resource
|
|
479
|
+
finally:
|
|
480
|
+
resource.close()
|