weasyprint 67.0__py3-none-any.whl → 68.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
weasyprint/pdf/stream.py CHANGED
@@ -201,8 +201,6 @@ class Stream(pydyf.Stream):
201
201
  'Group': pydyf.Dictionary({
202
202
  'Type': '/Group',
203
203
  'S': '/Transparency',
204
- 'I': 'true',
205
- 'CS': '/DeviceRGB',
206
204
  }),
207
205
  })
208
206
  group = self.clone(resources=resources, extra=extra)
@@ -155,6 +155,14 @@ class Node:
155
155
  for name, value in declarations:
156
156
  child.attrib[name] = value.strip()
157
157
 
158
+ # Expand
159
+ # TODO: simplified expanders, use CSS expander code instead.
160
+ if font := child.attrib.pop('font', None):
161
+ parts = font.strip().split(maxsplit=1)
162
+ if len(parts) == 2:
163
+ child.attrib['font-size'] = parts[0]
164
+ child.attrib['font-family'] = parts[1]
165
+
158
166
  # Replace 'currentColor' value
159
167
  for key in COLOR_ATTRIBUTES:
160
168
  if child.get(key) == 'currentColor':
@@ -219,6 +227,8 @@ class Node:
219
227
 
220
228
  def get_child(self, id_):
221
229
  """Get a child with given id in the whole child tree."""
230
+ if self._etree_node.find(f'.//*[@id="{id_}"]') is None:
231
+ return
222
232
  for child in self:
223
233
  if child.get('id') == id_:
224
234
  return child
@@ -324,23 +334,52 @@ class Node:
324
334
  svg.inner_diagonal = hypot(svg.inner_width, svg.inner_height) / sqrt(2)
325
335
 
326
336
 
337
+ class LazyDefs:
338
+ def __init__(self, name, svg):
339
+ self._name = name
340
+ self._svg = svg
341
+ self._data = {}
342
+
343
+ def __getitem__(self, name):
344
+ return self.get(name)
345
+
346
+ def get(self, name):
347
+ if not name:
348
+ return
349
+ if name in self._data:
350
+ return self._data[name]
351
+ node = self._svg.tree.get_child(name)
352
+ if node is not None and self._name in node.tag.lower():
353
+ self._data[name] = node
354
+ if self._name in ('gradient', 'pattern'):
355
+ self._svg.inherit_element(node, self)
356
+ else:
357
+ self._data[name] = None
358
+ return self._data[name]
359
+
360
+ def __contains__(self, name):
361
+ return self.get(name)
362
+
363
+
327
364
  class SVG:
328
365
  """An SVG document."""
329
366
 
330
- def __init__(self, tree, url, font_config):
367
+ def __init__(self, tree, url, font_config, url_fetcher=None):
331
368
  wrapper = ElementWrapper.from_xml_root(tree)
332
- style = parse_stylesheets(wrapper, url)
369
+ style = parse_stylesheets(wrapper, url, font_config, url_fetcher)
333
370
  self.tree = Node(wrapper, style)
334
371
  self.font_config = font_config
372
+ self.url_fetcher = url_fetcher
335
373
  self.url = url
336
- self.filters = {}
337
- self.gradients = {}
338
- self.images = {}
339
- self.markers = {}
340
- self.masks = {}
341
- self.patterns = {}
342
- self.paths = {}
343
- self.symbols = {}
374
+
375
+ self.filters = LazyDefs('filter', self)
376
+ self.gradients = LazyDefs('gradient', self)
377
+ self.images = LazyDefs('image', self)
378
+ self.markers = LazyDefs('marker', self)
379
+ self.masks = LazyDefs('mask', self)
380
+ self.patterns = LazyDefs('pattern', self)
381
+ self.paths = LazyDefs('path', self)
382
+ self.symbols = LazyDefs('symbol', self)
344
383
 
345
384
  self.use_cache = {}
346
385
 
@@ -349,8 +388,6 @@ class SVG:
349
388
  self.text_path_width = 0
350
389
 
351
390
  self.tree.cascade(self.tree)
352
- self.parse_defs(self.tree)
353
- self.inherit_defs()
354
391
 
355
392
  def get_intrinsic_size(self, font_size):
356
393
  """Get intrinsic size of the image."""
@@ -382,15 +419,13 @@ class SVG:
382
419
  """Compute size of an arbirtary attribute."""
383
420
  return size(length, font_size, self.inner_diagonal)
384
421
 
385
- def draw(self, stream, concrete_width, concrete_height, base_url,
386
- url_fetcher, context):
422
+ def draw(self, stream, concrete_width, concrete_height, base_url, context):
387
423
  """Draw image on a stream."""
388
424
  self.stream = stream
389
425
 
390
426
  self.tree.set_svg_size(self, concrete_width, concrete_height)
391
427
 
392
428
  self.base_url = base_url
393
- self.url_fetcher = url_fetcher
394
429
  self.context = context
395
430
 
396
431
  self.draw_node(self.tree, size('12pt'))
@@ -796,20 +831,6 @@ class SVG:
796
831
  if matrix.determinant:
797
832
  self.stream.transform(*matrix.values)
798
833
 
799
- def parse_defs(self, node):
800
- """Parse defs included in a tree."""
801
- for def_type in DEF_TYPES:
802
- if def_type in node.tag.lower() and 'id' in node.attrib:
803
- getattr(self, f'{def_type}s')[node.attrib['id']] = node
804
- for child in node:
805
- self.parse_defs(child)
806
-
807
- def inherit_defs(self):
808
- """Handle inheritance of different defined elements lists."""
809
- for defs in (self.gradients, self.patterns):
810
- for element in defs.values():
811
- self.inherit_element(element, defs)
812
-
813
834
  def inherit_element(self, element, defs):
814
835
  """Recursively handle inheritance of defined element."""
815
836
  href = element.get_href(self.url)
@@ -840,7 +861,7 @@ class SVG:
840
861
  class Pattern(SVG):
841
862
  """SVG node applied as a pattern."""
842
863
  def __init__(self, tree, svg):
843
- super().__init__(tree._etree_node, svg.url, svg.font_config)
864
+ super().__init__(tree._etree_node, svg.url, svg.font_config, svg.url_fetcher)
844
865
  self.svg = svg
845
866
  self.tree = tree
846
867
 
weasyprint/svg/css.py CHANGED
@@ -5,11 +5,12 @@ from urllib.parse import urljoin
5
5
  import cssselect2
6
6
  import tinycss2
7
7
 
8
+ from ..css.validation.descriptors import preprocess_descriptors
8
9
  from ..logger import LOGGER
9
10
  from .utils import parse_url
10
11
 
11
12
 
12
- def find_stylesheets_rules(tree, stylesheet_rules, url):
13
+ def find_stylesheets_rules(tree, stylesheet_rules, url, font_config, url_fetcher):
13
14
  """Find rules among stylesheet rules and imports."""
14
15
  for rule in stylesheet_rules:
15
16
  if rule.type == 'at-rule':
@@ -22,7 +23,22 @@ def find_stylesheets_rules(tree, stylesheet_rules, url):
22
23
  stylesheet = tinycss2.parse_stylesheet(
23
24
  tree.fetch_url(css_url, 'text/css').decode())
24
25
  url = css_url.geturl()
25
- yield from find_stylesheets_rules(tree, stylesheet, url)
26
+ yield from find_stylesheets_rules(
27
+ tree, stylesheet, url, font_config, url_fetcher)
28
+ elif rule.lower_at_keyword == 'font-face':
29
+ if font_config is not None and url_fetcher is not None:
30
+ content = tinycss2.parse_blocks_contents(rule.content)
31
+ rule_descriptors = dict(
32
+ preprocess_descriptors('font-face', url, content))
33
+ for key in ('src', 'font_family'):
34
+ if key not in rule_descriptors:
35
+ LOGGER.warning(
36
+ "Missing %s descriptor in '@font-face' rule at "
37
+ "%d:%d", key.replace('_', '-'),
38
+ rule.source_line, rule.source_column)
39
+ break
40
+ else:
41
+ font_config.add_font_face(rule_descriptors, url_fetcher)
26
42
  # TODO: support media types
27
43
  # if rule.lower_at_keyword == 'media':
28
44
  elif rule.type == 'qualified-rule':
@@ -49,7 +65,7 @@ def parse_declarations(input):
49
65
  return normal_declarations, important_declarations
50
66
 
51
67
 
52
- def parse_stylesheets(tree, url):
68
+ def parse_stylesheets(tree, url, font_config, url_fetcher):
53
69
  """Find stylesheets and return rule matchers in given tree."""
54
70
  normal_matcher = cssselect2.Matcher()
55
71
  important_matcher = cssselect2.Matcher()
@@ -70,7 +86,8 @@ def parse_stylesheets(tree, url):
70
86
 
71
87
  # Parse rules and fill matchers
72
88
  for stylesheet in stylesheets:
73
- for rule in find_stylesheets_rules(tree, stylesheet, url):
89
+ for rule in find_stylesheets_rules(
90
+ tree, stylesheet, url, font_config, url_fetcher):
74
91
  normal_declarations, important_declarations = parse_declarations(
75
92
  rule.content)
76
93
  try:
weasyprint/svg/defs.py CHANGED
@@ -102,10 +102,12 @@ def draw_gradient(svg, node, gradient, font_size, opacity, stroke):
102
102
  return False
103
103
  if gradient.get('gradientUnits') == 'userSpaceOnUse':
104
104
  width, height = svg.inner_width, svg.inner_height
105
+ bx1, by1 = bounding_box[:2]
105
106
  matrix = Matrix()
106
107
  else:
107
108
  width, height = 1, 1
108
109
  e, f, a, d = bounding_box
110
+ bx1, by1 = 0, 0
109
111
  matrix = Matrix(a=a, d=d, e=e, f=f)
110
112
 
111
113
  spread = gradient.get('spreadMethod', 'pad')
@@ -180,10 +182,10 @@ def draw_gradient(svg, node, gradient, font_size, opacity, stroke):
180
182
  if 0 not in (a0, a1) and (a0, a1) != (1, 1):
181
183
  color_couples[i][2] = a0 / a1
182
184
 
183
- bx1, by1 = 0, 0
184
185
  if 'gradientTransform' in gradient.attrib:
186
+ bx2, by2 = bx1 + width, by1 + height
185
187
  bx1, by1 = transform_matrix.invert.transform_point(bx1, by1)
186
- bx2, by2 = transform_matrix.invert.transform_point(width, height)
188
+ bx2, by2 = transform_matrix.invert.transform_point(bx2, by2)
187
189
  width, height = bx2 - bx1, by2 - by1
188
190
 
189
191
  # Ensure that width and height are positive to please some PDF readers
@@ -457,7 +459,7 @@ def draw_pattern(svg, node, pattern, font_size, opacity, stroke):
457
459
  group = stream_pattern.add_group(0, 0, pattern_width, pattern_height)
458
460
  Pattern(pattern, svg).draw(
459
461
  group, pattern_width, pattern_height, svg.base_url,
460
- svg.url_fetcher, svg.context)
462
+ svg.context)
461
463
  stream_pattern.draw_x_object(group.id)
462
464
  svg.stream.set_color_space('Pattern', stroke=stroke)
463
465
  svg.stream.set_color_special(stream_pattern.id, stroke=stroke)
weasyprint/text/fonts.py CHANGED
@@ -167,9 +167,8 @@ class FontConfiguration:
167
167
 
168
168
  # Get font content.
169
169
  try:
170
- with fetch(url_fetcher, url) as result:
171
- string = 'string' in result
172
- font = result['string'] if string else result['file_obj'].read()
170
+ with fetch(url_fetcher, url) as response:
171
+ font = response.read()
173
172
  except Exception as exception:
174
173
  LOGGER.debug('Failed to load font at %r (%s)', url, exception)
175
174
  continue
weasyprint/urls.py CHANGED
@@ -6,11 +6,14 @@ import os.path
6
6
  import re
7
7
  import sys
8
8
  import traceback
9
+ import warnings
9
10
  import zlib
11
+ from email.message import EmailMessage
10
12
  from gzip import GzipFile
13
+ from io import BytesIO, StringIO
11
14
  from pathlib import Path
15
+ from urllib import request
12
16
  from urllib.parse import quote, unquote, urljoin, urlsplit
13
- from urllib.request import Request, pathname2url, url2pathname, urlopen
14
17
 
15
18
  from . import __version__
16
19
  from .logger import LOGGER
@@ -55,8 +58,7 @@ def iri_to_uri(url):
55
58
  # Data URIs can be huge, but don’t need this anyway.
56
59
  return url
57
60
  # Use UTF-8 as per RFC 3987 (IRI), except for file://
58
- url = url.encode(
59
- FILESYSTEM_ENCODING if url.startswith('file:') else 'utf-8')
61
+ url = url.encode(FILESYSTEM_ENCODING if url.startswith('file:') else 'utf-8')
60
62
  # This is a full URI, not just a component. Only %-encode characters
61
63
  # that are not allowed at all in URIs. Everthing else is "safe":
62
64
  # * Reserved characters: /:?#[]@!$&'()*+,;=
@@ -85,7 +87,7 @@ def path2url(path):
85
87
  # Otherwise relative URIs are resolved from the parent directory.
86
88
  path += os.path.sep
87
89
  wants_trailing_slash = True
88
- path = pathname2url(path)
90
+ path = request.pathname2url(path)
89
91
  # On Windows pathname2url cuts off trailing slash
90
92
  if wants_trailing_slash and not path.endswith('/'):
91
93
  path += '/' # pragma: no cover
@@ -191,114 +193,288 @@ def default_url_fetcher(url, timeout=10, ssl_context=None, http_headers=None,
191
193
  allowed_protocols=None):
192
194
  """Fetch an external resource such as an image or stylesheet.
193
195
 
194
- Another callable with the same signature can be given as the
195
- ``url_fetcher`` argument to :class:`HTML` or :class:`CSS`.
196
- (See :ref:`URL Fetchers`.)
197
-
198
- :param str url:
199
- The URL of the resource to fetch.
200
- :param int timeout:
201
- The number of seconds before HTTP requests are dropped.
202
- :param ssl.SSLContext ssl_context:
203
- An SSL context used for HTTP requests.
204
- :param dict http_headers:
205
- Additional HTTP headers used for HTTP requests.
206
- :param set allowed_protocols:
207
- A set of authorized protocols.
208
- :raises: An exception indicating failure, e.g. :obj:`ValueError` on
209
- syntactically invalid URL.
210
- :returns: A :obj:`dict` with the following keys:
211
-
212
- * One of ``string`` (a :obj:`bytestring <bytes>`) or ``file_obj``
213
- (a :term:`file object`).
214
- * Optionally: ``mime_type``, a MIME type extracted e.g. from a
215
- *Content-Type* header. If not provided, the type is guessed from the
216
- file extension in the URL.
217
- * Optionally: ``encoding``, a character encoding extracted e.g. from a
218
- *charset* parameter in a *Content-Type* header
219
- * Optionally: ``redirected_url``, the actual URL of the resource
220
- if there were e.g. HTTP redirects.
221
- * Optionally: ``filename``, the filename of the resource. Usually
222
- derived from the *filename* parameter in a *Content-Disposition*
223
- header.
224
- * Optionally: ``path``, the path of the resource if it is stored on the
225
- local filesystem.
226
-
227
- If a ``file_obj`` key is given, it is the caller’s responsibility
228
- to call ``file_obj.close()``. The default function used internally to
229
- fetch data in WeasyPrint tries to close the file object after
230
- retreiving; but if this URL fetcher is used elsewhere, the file object
231
- has to be closed manually.
196
+ This function is deprecated, use ``URLFetcher`` instead.
232
197
 
233
198
  """
234
- if UNICODE_SCHEME_RE.match(url):
235
- if allowed_protocols is not None:
236
- if url.split('://', 1)[0].lower() not in allowed_protocols:
199
+ warnings.warn(
200
+ "default_url_fetcher is deprecated and will be removed in WeasyPrint 69.0, "
201
+ "please use URLFetcher instead. For security reasons, HTTP redirects are not "
202
+ "supported anymore with default_url_fetcher, but are with URLFetcher.\n\nSee "
203
+ "https://doc.courtbouillon.org/weasyprint/stable/first_steps.html#url-fetchers",
204
+ category=DeprecationWarning)
205
+ fetcher = URLFetcher(
206
+ timeout, ssl_context, http_headers, allowed_protocols, allow_redirects=False)
207
+ return fetcher.fetch(url)
208
+
209
+
210
+ @contextlib.contextmanager
211
+ def select_source(guess=None, filename=None, url=None, file_obj=None, string=None,
212
+ base_url=None, url_fetcher=None, check_css_mime_type=False):
213
+ """If only one input is given, return it.
214
+
215
+ Yield a file object, the base url, the protocol encoding and the protocol mime-type.
216
+
217
+ """
218
+ if base_url is not None:
219
+ base_url = ensure_url(base_url)
220
+ if url_fetcher is None:
221
+ url_fetcher = URLFetcher()
222
+
223
+ selected_params = [
224
+ param for param in (guess, filename, url, file_obj, string) if
225
+ param is not None]
226
+ if len(selected_params) != 1:
227
+ source = ', '.join(selected_params) or 'nothing'
228
+ raise TypeError(f'Expected exactly one source, got {source}')
229
+ elif guess is not None:
230
+ kwargs = {
231
+ 'base_url': base_url,
232
+ 'url_fetcher': url_fetcher,
233
+ 'check_css_mime_type': check_css_mime_type,
234
+ }
235
+ if hasattr(guess, 'read'):
236
+ kwargs['file_obj'] = guess
237
+ elif isinstance(guess, Path):
238
+ kwargs['filename'] = guess
239
+ elif url_is_absolute(guess):
240
+ kwargs['url'] = guess
241
+ else:
242
+ kwargs['filename'] = guess
243
+ result = select_source(**kwargs)
244
+ with result as result:
245
+ yield result
246
+ elif filename is not None:
247
+ if base_url is None:
248
+ base_url = path2url(filename)
249
+ with open(filename, 'rb') as file_obj:
250
+ yield file_obj, base_url, None, None
251
+ elif url is not None:
252
+ with fetch(url_fetcher, url) as response:
253
+ if check_css_mime_type and response.content_type != 'text/css':
254
+ LOGGER.error(
255
+ f'Unsupported stylesheet type {response.content_type} '
256
+ f'for {response.url}')
257
+ yield StringIO(''), base_url, None, None
258
+ else:
259
+ if base_url is None:
260
+ base_url = response.url
261
+ yield response, base_url, response.charset, response.content_type
262
+ elif file_obj is not None:
263
+ if base_url is None:
264
+ # filesystem file-like objects have a 'name' attribute.
265
+ name = getattr(file_obj, 'name', None)
266
+ # Some streams have a .name like '<stdin>', not a filename.
267
+ if name and not name.startswith('<'):
268
+ base_url = ensure_url(name)
269
+ yield file_obj, base_url, None, None
270
+ else:
271
+ if isinstance(string, str):
272
+ yield StringIO(string), base_url, None, None
273
+ else:
274
+ yield BytesIO(string), base_url, None, None
275
+
276
+
277
+ class URLFetchingError(IOError):
278
+ """Some error happened when fetching an URL."""
279
+
280
+
281
+ class FatalURLFetchingError(BaseException):
282
+ """Some error happened when fetching an URL and must stop the rendering."""
283
+
284
+
285
+ class URLFetcher(request.OpenerDirector):
286
+ """Fetcher of external resources such as images or stylesheets.
287
+
288
+ :param int timeout: The number of seconds before HTTP requests are dropped.
289
+ :param ssl.SSLContext ssl_context: An SSL context used for HTTPS requests.
290
+ :param dict http_headers: Additional HTTP headers used for HTTP requests.
291
+ :type allowed_protocols: :term:`sequence`
292
+ :param allowed_protocols: A set of authorized protocols, :obj:`None` means all.
293
+ :param bool allow_redirects: Whether HTTP redirects must be followed.
294
+ :param bool fail_on_errors: Whether HTTP errors should stop the rendering.
295
+
296
+ Another class inheriting from this class, with a ``fetch`` method that has a
297
+ compatible signature, can be given as the ``url_fetcher`` argument to
298
+ :class:`weasyprint.HTML` or :class:`weasyprint.CSS`.
299
+
300
+ See :ref:`URL Fetchers` for more information and examples.
301
+
302
+ """
303
+
304
+ def __init__(self, timeout=10, ssl_context=None, http_headers=None,
305
+ allowed_protocols=None, allow_redirects=True, fail_on_errors=False,
306
+ **kwargs):
307
+ super().__init__()
308
+ handlers = [
309
+ request.ProxyHandler(), request.UnknownHandler(), request.HTTPHandler(),
310
+ request.HTTPDefaultErrorHandler(), request.FTPHandler(),
311
+ request.FileHandler(), request.HTTPErrorProcessor(), request.DataHandler(),
312
+ request.HTTPSHandler(context=ssl_context)]
313
+ if allow_redirects:
314
+ handlers.append(request.HTTPRedirectHandler())
315
+ for handler in handlers:
316
+ self.add_handler(handler)
317
+
318
+ self._timeout = timeout
319
+ self._http_headers = {**HTTP_HEADERS, **(http_headers or {})}
320
+ self._allowed_protocols = allowed_protocols
321
+ self._fail_on_errors = fail_on_errors
322
+
323
+ def fetch(self, url, headers=None):
324
+ """Fetch a given URL.
325
+
326
+ :returns: A :obj:`URLFetcherResponse` instance.
327
+ :raises: An exception indicating failure, e.g. :obj:`ValueError` on
328
+ syntactically invalid URL. All exceptions are catched internally by
329
+ WeasyPrint, except when they inherit from :obj:`FatalURLFetchingError`.
330
+
331
+ """
332
+ # Discard URLs with no or invalid protocol.
333
+ if not UNICODE_SCHEME_RE.match(url): # pragma: no cover
334
+ raise ValueError(f'Not an absolute URI: {url}')
335
+
336
+ # Discard URLs with forbidden protocol.
337
+ if self._allowed_protocols is not None:
338
+ if url.split('://', 1)[0].lower() not in self._allowed_protocols:
237
339
  raise ValueError(f'URI uses disallowed protocol: {url}')
238
340
 
239
- # See https://bugs.python.org/issue34702
341
+ # Remove query and fragment parts from file URLs.
342
+ # See https://bugs.python.org/issue34702.
240
343
  if url.lower().startswith('file://'):
241
344
  url = url.split('?')[0]
242
- path = url2pathname(url.removeprefix('file:'))
243
- else:
244
- path = None
245
345
 
346
+ # Transform Unicode IRI to ASCII URI.
246
347
  url = iri_to_uri(url)
247
- if http_headers is not None:
248
- http_headers = {**HTTP_HEADERS, **http_headers}
348
+
349
+ # Open URL.
350
+ headers = {**self._http_headers, **(headers or {})}
351
+ http_request = request.Request(url, headers=headers)
352
+ response = super().open(http_request, timeout=self._timeout)
353
+
354
+ # Decompress response.
355
+ body = response
356
+ if 'Content-Encoding' in response.headers:
357
+ content_encoding = response.headers['Content-Encoding']
358
+ del response.headers['Content-Encoding']
359
+ if content_encoding == 'gzip':
360
+ body = StreamingGzipFile(fileobj=response)
361
+ elif content_encoding == 'deflate':
362
+ data = response.read()
363
+ try:
364
+ body = zlib.decompress(data)
365
+ except zlib.error:
366
+ # Try without zlib header or checksum.
367
+ body = zlib.decompress(data, -15)
368
+
369
+ return URLFetcherResponse(response.url, body, response.headers, response.status)
370
+
371
+ def open(self, url, data=None, timeout=None):
372
+ if isinstance(url, request.Request):
373
+ return self.fetch(url.full_url, url.headers)
374
+ return self.fetch(url)
375
+
376
+ def __call__(self, url):
377
+ return self.fetch(url)
378
+
379
+
380
+ class URLFetcherResponse:
381
+ """The HTTP response of an URL fetcher.
382
+
383
+ :param str url: The URL of the HTTP response.
384
+ :type body: :class:`str`, :class:`bytes` or :term:`file object`
385
+ :param body: The body of the HTTP response.
386
+ :type headers: dict or email.message.EmailMessage
387
+ :param headers: The headers of the HTTP response.
388
+ :param str status: The status of the HTTP response.
389
+
390
+ Has the same interface as :class:`urllib.response.addinfourl`.
391
+
392
+ If a :term:`file object` is given for the body, it is the caller’s responsibility to
393
+ call ``close()`` on it. The default function used internally to fetch data in
394
+ WeasyPrint tries to close the file object after retreiving; but if this URL fetcher
395
+ is used elsewhere, the file object has to be closed manually.
396
+
397
+ """
398
+ def __init__(self, url, body=None, headers=None, status='200 OK', **kwargs):
399
+ self.url = url
400
+ self.status = status
401
+
402
+ if isinstance(headers, EmailMessage):
403
+ self.headers = headers
249
404
  else:
250
- http_headers = HTTP_HEADERS
251
- response = urlopen(
252
- Request(url, headers=http_headers), timeout=timeout,
253
- context=ssl_context)
254
- result = {
255
- 'redirected_url': response.url,
256
- 'mime_type': response.headers.get_content_type(),
257
- 'encoding': response.headers.get_param('charset'),
258
- 'filename': response.headers.get_filename(),
259
- 'path': path,
260
- }
261
- content_encoding = response.headers.get('Content-Encoding')
262
- if content_encoding == 'gzip':
263
- result['file_obj'] = StreamingGzipFile(fileobj=response)
264
- elif content_encoding == 'deflate':
265
- data = response.read()
266
- try:
267
- result['string'] = zlib.decompress(data)
268
- except zlib.error:
269
- # Try without zlib header or checksum
270
- result['string'] = zlib.decompress(data, -15)
405
+ self.headers = EmailMessage()
406
+ for key, value in (headers or {}).items():
407
+ self.headers[key] = value
408
+
409
+ if hasattr(body, 'read'):
410
+ self._file_obj = body
411
+ elif isinstance(body, str):
412
+ self.headers.set_param('charset', 'utf-8')
413
+ self._file_obj = BytesIO(body.encode('utf-8'))
271
414
  else:
272
- result['file_obj'] = response
273
- return result
274
- else: # pragma: no cover
275
- raise ValueError(f'Not an absolute URI: {url}')
415
+ self._file_obj = BytesIO(body)
276
416
 
417
+ def read(self, *args, **kwargs):
418
+ return self._file_obj.read(*args, **kwargs)
277
419
 
278
- class URLFetchingError(IOError):
279
- """Some error happened when fetching an URL."""
420
+ def close(self):
421
+ try:
422
+ self._file_obj.close()
423
+ except Exception: # pragma: no cover
424
+ # May already be closed or something.
425
+ # This is just cleanup anyway: log but make it non-fatal.
426
+ LOGGER.warning(
427
+ 'Error when closing stream for %s:\n%s',
428
+ self.url, traceback.format_exc())
429
+
430
+ @property
431
+ def path(self):
432
+ if self.url.startswith('file:'):
433
+ return request.url2pathname(self.url.split('?')[0].removeprefix('file:'))
434
+
435
+ @property
436
+ def content_type(self):
437
+ return self.headers.get_content_type()
438
+
439
+ @property
440
+ def charset(self):
441
+ return self.headers.get_param('charset')
280
442
 
281
443
 
282
444
  @contextlib.contextmanager
283
445
  def fetch(url_fetcher, url):
284
- """Call an url_fetcher, fill in optional data, and clean up."""
446
+ """Fetch an ``url`` with ```url_fetcher``, fill in optional data, and clean up.
447
+
448
+ Fatal errors must raise a ``FatalURLFetchingError`` that stops the rendering. All
449
+ other exceptions are catched and raise an ``URLFetchingError``, that is usually
450
+ catched by the code that fetches the resource and emits a warning.
451
+
452
+ """
285
453
  try:
286
- result = url_fetcher(url)
454
+ resource = url_fetcher(url)
287
455
  except Exception as exception:
456
+ if getattr(url_fetcher, '_fail_on_errors', False):
457
+ raise FatalURLFetchingError(f'Error fetching "{url}"') from exception
288
458
  raise URLFetchingError(f'{type(exception).__name__}: {exception}')
289
- result.setdefault('redirected_url', url)
290
- result.setdefault('mime_type', None)
291
- if 'file_obj' in result:
292
- try:
293
- yield result
294
- finally:
295
- try:
296
- result['file_obj'].close()
297
- except Exception: # pragma: no cover
298
- # May already be closed or something.
299
- # This is just cleanup anyway: log but make it non-fatal.
300
- LOGGER.warning(
301
- 'Error when closing stream for %s:\n%s',
302
- url, traceback.format_exc())
303
- else:
304
- yield result
459
+
460
+ if isinstance(resource, dict):
461
+ warnings.warn(
462
+ "Returning dicts in URL fetchers is deprecated and will be removed "
463
+ "in WeasyPrint 69.0, please return URLFetcherResponse instead.",
464
+ category=DeprecationWarning)
465
+ if 'url' not in resource:
466
+ resource['url'] = resource.get('redirected_url', url)
467
+ resource['body'] = resource.get('file_obj', resource.get('string'))
468
+ content_type = resource.get('mime_type', 'application/octet-stream')
469
+ if charset := resource.get('encoding'):
470
+ content_type += f';{charset}'
471
+ resource['headers'] = {'Content-Type': content_type}
472
+ resource = URLFetcherResponse(**resource)
473
+
474
+ assert isinstance(resource, URLFetcherResponse), (
475
+ 'URL fetcher must return either a dict or a URLFetcherResponse instance')
476
+
477
+ try:
478
+ yield resource
479
+ finally:
480
+ resource.close()