PyPI - weasyprint - Versions diffs - 67.0__py3-none-any.whl → 68.0__py3-none-any.whl - Mend

weasyprint 67.0py3-none-any.whl → 68.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

weasyprint/__init__.py +35 -103
weasyprint/__main__.py +107 -80
weasyprint/css/__init__.py +4 -10
weasyprint/css/functions.py +5 -0
weasyprint/css/html5_ua.css +1 -1
weasyprint/css/tokens.py +4 -1
weasyprint/css/validation/properties.py +4 -4
weasyprint/document.py +4 -64
weasyprint/draw/text.py +4 -2
weasyprint/formatting_structure/boxes.py +4 -1
weasyprint/formatting_structure/build.py +111 -37
weasyprint/images.py +27 -32
weasyprint/layout/__init__.py +2 -1
weasyprint/layout/grid.py +25 -14
weasyprint/layout/page.py +4 -4
weasyprint/layout/preferred.py +35 -2
weasyprint/pdf/__init__.py +12 -1
weasyprint/pdf/anchors.py +10 -16
weasyprint/pdf/fonts.py +12 -3
weasyprint/pdf/metadata.py +153 -98
weasyprint/pdf/pdfa.py +1 -3
weasyprint/pdf/pdfua.py +1 -3
weasyprint/pdf/pdfx.py +1 -3
weasyprint/pdf/stream.py +0 -2
weasyprint/svg/__init__.py +51 -30
weasyprint/svg/css.py +21 -4
weasyprint/svg/defs.py +5 -3
weasyprint/text/fonts.py +2 -3
weasyprint/urls.py +272 -96
{weasyprint-67.0.dist-info → weasyprint-68.0.dist-info}/METADATA +2 -1
{weasyprint-67.0.dist-info → weasyprint-68.0.dist-info}/RECORD +34 -34
{weasyprint-67.0.dist-info → weasyprint-68.0.dist-info}/WHEEL +0 -0
{weasyprint-67.0.dist-info → weasyprint-68.0.dist-info}/entry_points.txt +0 -0
{weasyprint-67.0.dist-info → weasyprint-68.0.dist-info}/licenses/LICENSE +0 -0

weasyprint/pdf/stream.py CHANGED Viewed

@@ -201,8 +201,6 @@ class Stream(pydyf.Stream):
             'Group': pydyf.Dictionary({
                 'Type': '/Group',
                 'S': '/Transparency',
-                'I': 'true',
-                'CS': '/DeviceRGB',
             }),
         })
         group = self.clone(resources=resources, extra=extra)

weasyprint/svg/__init__.py CHANGED Viewed

@@ -155,6 +155,14 @@ class Node:
                 for name, value in declarations:
                     child.attrib[name] = value.strip()
+        # Expand
+        # TODO: simplified expanders, use CSS expander code instead.
+        if font := child.attrib.pop('font', None):
+            parts = font.strip().split(maxsplit=1)
+            if len(parts) == 2:
+                child.attrib['font-size'] = parts[0]
+                child.attrib['font-family'] = parts[1]
         # Replace 'currentColor' value
         for key in COLOR_ATTRIBUTES:
             if child.get(key) == 'currentColor':
@@ -219,6 +227,8 @@ class Node:
     def get_child(self, id_):
         """Get a child with given id in the whole child tree."""
+        if self._etree_node.find(f'.//*[@id="{id_}"]') is None:
+            return
         for child in self:
             if child.get('id') == id_:
                 return child
@@ -324,23 +334,52 @@ class Node:
         svg.inner_diagonal = hypot(svg.inner_width, svg.inner_height) / sqrt(2)
+class LazyDefs:
+    def __init__(self, name, svg):
+        self._name = name
+        self._svg = svg
+        self._data = {}
+    def __getitem__(self, name):
+        return self.get(name)
+    def get(self, name):
+        if not name:
+            return
+        if name in self._data:
+            return self._data[name]
+        node = self._svg.tree.get_child(name)
+        if node is not None and self._name in node.tag.lower():
+            self._data[name] = node
+            if self._name in ('gradient', 'pattern'):
+                self._svg.inherit_element(node, self)
+        else:
+            self._data[name] = None
+        return self._data[name]
+    def __contains__(self, name):
+        return self.get(name)
 class SVG:
     """An SVG document."""
-    def __init__(self, tree, url, font_config):
+    def __init__(self, tree, url, font_config, url_fetcher=None):
         wrapper = ElementWrapper.from_xml_root(tree)
-        style = parse_stylesheets(wrapper, url)
+        style = parse_stylesheets(wrapper, url, font_config, url_fetcher)
         self.tree = Node(wrapper, style)
         self.font_config = font_config
+        self.url_fetcher = url_fetcher
         self.url = url
-        self.filters = {}
-        self.gradients = {}
-        self.images = {}
-        self.markers = {}
-        self.masks = {}
-        self.patterns = {}
-        self.paths = {}
-        self.symbols = {}
+        self.filters = LazyDefs('filter', self)
+        self.gradients = LazyDefs('gradient', self)
+        self.images = LazyDefs('image', self)
+        self.markers = LazyDefs('marker', self)
+        self.masks = LazyDefs('mask', self)
+        self.patterns = LazyDefs('pattern', self)
+        self.paths = LazyDefs('path', self)
+        self.symbols = LazyDefs('symbol', self)
         self.use_cache = {}
@@ -349,8 +388,6 @@ class SVG:
         self.text_path_width = 0
         self.tree.cascade(self.tree)
-        self.parse_defs(self.tree)
-        self.inherit_defs()
     def get_intrinsic_size(self, font_size):
         """Get intrinsic size of the image."""
@@ -382,15 +419,13 @@ class SVG:
         """Compute size of an arbirtary attribute."""
         return size(length, font_size, self.inner_diagonal)
-    def draw(self, stream, concrete_width, concrete_height, base_url,
-             url_fetcher, context):
+    def draw(self, stream, concrete_width, concrete_height, base_url, context):
         """Draw image on a stream."""
         self.stream = stream
         self.tree.set_svg_size(self, concrete_width, concrete_height)
         self.base_url = base_url
-        self.url_fetcher = url_fetcher
         self.context = context
         self.draw_node(self.tree, size('12pt'))
@@ -796,20 +831,6 @@ class SVG:
         if matrix.determinant:
             self.stream.transform(*matrix.values)
-    def parse_defs(self, node):
-        """Parse defs included in a tree."""
-        for def_type in DEF_TYPES:
-            if def_type in node.tag.lower() and 'id' in node.attrib:
-                getattr(self, f'{def_type}s')[node.attrib['id']] = node
-        for child in node:
-            self.parse_defs(child)
-    def inherit_defs(self):
-        """Handle inheritance of different defined elements lists."""
-        for defs in (self.gradients, self.patterns):
-            for element in defs.values():
-                self.inherit_element(element, defs)
     def inherit_element(self, element, defs):
         """Recursively handle inheritance of defined element."""
         href = element.get_href(self.url)
@@ -840,7 +861,7 @@ class SVG:
 class Pattern(SVG):
     """SVG node applied as a pattern."""
     def __init__(self, tree, svg):
-        super().__init__(tree._etree_node, svg.url, svg.font_config)
+        super().__init__(tree._etree_node, svg.url, svg.font_config, svg.url_fetcher)
         self.svg = svg
         self.tree = tree

weasyprint/svg/css.py CHANGED Viewed

@@ -5,11 +5,12 @@ from urllib.parse import urljoin
 import cssselect2
 import tinycss2
+from ..css.validation.descriptors import preprocess_descriptors
 from ..logger import LOGGER
 from .utils import parse_url
-def find_stylesheets_rules(tree, stylesheet_rules, url):
+def find_stylesheets_rules(tree, stylesheet_rules, url, font_config, url_fetcher):
     """Find rules among stylesheet rules and imports."""
     for rule in stylesheet_rules:
         if rule.type == 'at-rule':
@@ -22,7 +23,22 @@ def find_stylesheets_rules(tree, stylesheet_rules, url):
                 stylesheet = tinycss2.parse_stylesheet(
                     tree.fetch_url(css_url, 'text/css').decode())
                 url = css_url.geturl()
-                yield from find_stylesheets_rules(tree, stylesheet, url)
+                yield from find_stylesheets_rules(
+                    tree, stylesheet, url, font_config, url_fetcher)
+            elif rule.lower_at_keyword == 'font-face':
+                if font_config is not None and url_fetcher is not None:
+                    content = tinycss2.parse_blocks_contents(rule.content)
+                    rule_descriptors = dict(
+                        preprocess_descriptors('font-face', url, content))
+                    for key in ('src', 'font_family'):
+                        if key not in rule_descriptors:
+                            LOGGER.warning(
+                                "Missing %s descriptor in '@font-face' rule at "
+                                "%d:%d", key.replace('_', '-'),
+                                rule.source_line, rule.source_column)
+                            break
+                    else:
+                        font_config.add_font_face(rule_descriptors, url_fetcher)
             # TODO: support media types
             # if rule.lower_at_keyword == 'media':
         elif rule.type == 'qualified-rule':
@@ -49,7 +65,7 @@ def parse_declarations(input):
     return normal_declarations, important_declarations
-def parse_stylesheets(tree, url):
+def parse_stylesheets(tree, url, font_config, url_fetcher):
     """Find stylesheets and return rule matchers in given tree."""
     normal_matcher = cssselect2.Matcher()
     important_matcher = cssselect2.Matcher()
@@ -70,7 +86,8 @@ def parse_stylesheets(tree, url):
     # Parse rules and fill matchers
     for stylesheet in stylesheets:
-        for rule in find_stylesheets_rules(tree, stylesheet, url):
+        for rule in find_stylesheets_rules(
+                tree, stylesheet, url, font_config, url_fetcher):
             normal_declarations, important_declarations = parse_declarations(
                 rule.content)
             try:

weasyprint/svg/defs.py CHANGED Viewed

@@ -102,10 +102,12 @@ def draw_gradient(svg, node, gradient, font_size, opacity, stroke):
         return False
     if gradient.get('gradientUnits') == 'userSpaceOnUse':
         width, height = svg.inner_width, svg.inner_height
+        bx1, by1 = bounding_box[:2]
         matrix = Matrix()
     else:
         width, height = 1, 1
         e, f, a, d = bounding_box
+        bx1, by1 = 0, 0
         matrix = Matrix(a=a, d=d, e=e, f=f)
     spread = gradient.get('spreadMethod', 'pad')
@@ -180,10 +182,10 @@ def draw_gradient(svg, node, gradient, font_size, opacity, stroke):
         if 0 not in (a0, a1) and (a0, a1) != (1, 1):
             color_couples[i][2] = a0 / a1
-    bx1, by1 = 0, 0
     if 'gradientTransform' in gradient.attrib:
+        bx2, by2 = bx1 + width, by1 + height
         bx1, by1 = transform_matrix.invert.transform_point(bx1, by1)
-        bx2, by2 = transform_matrix.invert.transform_point(width, height)
+        bx2, by2 = transform_matrix.invert.transform_point(bx2, by2)
         width, height = bx2 - bx1, by2 - by1
         # Ensure that width and height are positive to please some PDF readers
@@ -457,7 +459,7 @@ def draw_pattern(svg, node, pattern, font_size, opacity, stroke):
     group = stream_pattern.add_group(0, 0, pattern_width, pattern_height)
     Pattern(pattern, svg).draw(
         group, pattern_width, pattern_height, svg.base_url,
-        svg.url_fetcher, svg.context)
+        svg.context)
     stream_pattern.draw_x_object(group.id)
     svg.stream.set_color_space('Pattern', stroke=stroke)
     svg.stream.set_color_special(stream_pattern.id, stroke=stroke)

weasyprint/text/fonts.py CHANGED Viewed

@@ -167,9 +167,8 @@ class FontConfiguration:
             # Get font content.
             try:
-                with fetch(url_fetcher, url) as result:
-                    string = 'string' in result
-                    font = result['string'] if string else result['file_obj'].read()
+                with fetch(url_fetcher, url) as response:
+                    font = response.read()
             except Exception as exception:
                 LOGGER.debug('Failed to load font at %r (%s)', url, exception)
                 continue

weasyprint/urls.py CHANGED Viewed

@@ -6,11 +6,14 @@ import os.path
 import re
 import sys
 import traceback
+import warnings
 import zlib
+from email.message import EmailMessage
 from gzip import GzipFile
+from io import BytesIO, StringIO
 from pathlib import Path
+from urllib import request
 from urllib.parse import quote, unquote, urljoin, urlsplit
-from urllib.request import Request, pathname2url, url2pathname, urlopen
 from . import __version__
 from .logger import LOGGER
@@ -55,8 +58,7 @@ def iri_to_uri(url):
         # Data URIs can be huge, but don’t need this anyway.
         return url
     # Use UTF-8 as per RFC 3987 (IRI), except for file://
-    url = url.encode(
-        FILESYSTEM_ENCODING if url.startswith('file:') else 'utf-8')
+    url = url.encode(FILESYSTEM_ENCODING if url.startswith('file:') else 'utf-8')
     # This is a full URI, not just a component. Only %-encode characters
     # that are not allowed at all in URIs. Everthing else is "safe":
     # * Reserved characters: /:?#[]@!$&'()*+,;=
@@ -85,7 +87,7 @@ def path2url(path):
         # Otherwise relative URIs are resolved from the parent directory.
         path += os.path.sep
         wants_trailing_slash = True
-    path = pathname2url(path)
+    path = request.pathname2url(path)
     # On Windows pathname2url cuts off trailing slash
     if wants_trailing_slash and not path.endswith('/'):
         path += '/'  # pragma: no cover
@@ -191,114 +193,288 @@ def default_url_fetcher(url, timeout=10, ssl_context=None, http_headers=None,
                         allowed_protocols=None):
     """Fetch an external resource such as an image or stylesheet.
-    Another callable with the same signature can be given as the
-    ``url_fetcher`` argument to :class:`HTML` or :class:`CSS`.
-    (See :ref:`URL Fetchers`.)
-    :param str url:
-        The URL of the resource to fetch.
-    :param int timeout:
-        The number of seconds before HTTP requests are dropped.
-    :param ssl.SSLContext ssl_context:
-        An SSL context used for HTTP requests.
-    :param dict http_headers:
-        Additional HTTP headers used for HTTP requests.
-    :param set allowed_protocols:
-        A set of authorized protocols.
-    :raises: An exception indicating failure, e.g. :obj:`ValueError` on
-        syntactically invalid URL.
-    :returns: A :obj:`dict` with the following keys:
-        * One of ``string`` (a :obj:`bytestring <bytes>`) or ``file_obj``
-          (a :term:`file object`).
-        * Optionally: ``mime_type``, a MIME type extracted e.g. from a
-          *Content-Type* header. If not provided, the type is guessed from the
-          file extension in the URL.
-        * Optionally: ``encoding``, a character encoding extracted e.g. from a
-          *charset* parameter in a *Content-Type* header
-        * Optionally: ``redirected_url``, the actual URL of the resource
-          if there were e.g. HTTP redirects.
-        * Optionally: ``filename``, the filename of the resource. Usually
-          derived from the *filename* parameter in a *Content-Disposition*
-          header.
-        * Optionally: ``path``, the path of the resource if it is stored on the
-          local filesystem.
-        If a ``file_obj`` key is given, it is the caller’s responsibility
-        to call ``file_obj.close()``. The default function used internally to
-        fetch data in WeasyPrint tries to close the file object after
-        retreiving; but if this URL fetcher is used elsewhere, the file object
-        has to be closed manually.
+    This function is deprecated, use ``URLFetcher`` instead.
     """
-    if UNICODE_SCHEME_RE.match(url):
-        if allowed_protocols is not None:
-            if url.split('://', 1)[0].lower() not in allowed_protocols:
+    warnings.warn(
+        "default_url_fetcher is deprecated and will be removed in WeasyPrint 69.0, "
+        "please use URLFetcher instead. For security reasons, HTTP redirects are not "
+        "supported anymore with default_url_fetcher, but are with URLFetcher.\n\nSee "
+        "https://doc.courtbouillon.org/weasyprint/stable/first_steps.html#url-fetchers",
+        category=DeprecationWarning)
+    fetcher = URLFetcher(
+        timeout, ssl_context, http_headers, allowed_protocols, allow_redirects=False)
+    return fetcher.fetch(url)
+@contextlib.contextmanager
+def select_source(guess=None, filename=None, url=None, file_obj=None, string=None,
+                  base_url=None, url_fetcher=None, check_css_mime_type=False):
+    """If only one input is given, return it.
+    Yield a file object, the base url, the protocol encoding and the protocol mime-type.
+    """
+    if base_url is not None:
+        base_url = ensure_url(base_url)
+    if url_fetcher is None:
+        url_fetcher = URLFetcher()
+    selected_params = [
+        param for param in (guess, filename, url, file_obj, string) if
+        param is not None]
+    if len(selected_params) != 1:
+        source = ', '.join(selected_params) or 'nothing'
+        raise TypeError(f'Expected exactly one source, got {source}')
+    elif guess is not None:
+        kwargs = {
+            'base_url': base_url,
+            'url_fetcher': url_fetcher,
+            'check_css_mime_type': check_css_mime_type,
+        }
+        if hasattr(guess, 'read'):
+            kwargs['file_obj'] = guess
+        elif isinstance(guess, Path):
+            kwargs['filename'] = guess
+        elif url_is_absolute(guess):
+            kwargs['url'] = guess
+        else:
+            kwargs['filename'] = guess
+        result = select_source(**kwargs)
+        with result as result:
+            yield result
+    elif filename is not None:
+        if base_url is None:
+            base_url = path2url(filename)
+        with open(filename, 'rb') as file_obj:
+            yield file_obj, base_url, None, None
+    elif url is not None:
+        with fetch(url_fetcher, url) as response:
+            if check_css_mime_type and response.content_type != 'text/css':
+                LOGGER.error(
+                    f'Unsupported stylesheet type {response.content_type} '
+                    f'for {response.url}')
+                yield StringIO(''), base_url, None, None
+            else:
+                if base_url is None:
+                    base_url = response.url
+                yield response, base_url, response.charset, response.content_type
+    elif file_obj is not None:
+        if base_url is None:
+            # filesystem file-like objects have a 'name' attribute.
+            name = getattr(file_obj, 'name', None)
+            # Some streams have a .name like '<stdin>', not a filename.
+            if name and not name.startswith('<'):
+                base_url = ensure_url(name)
+        yield file_obj, base_url, None, None
+    else:
+        if isinstance(string, str):
+            yield StringIO(string), base_url, None, None
+        else:
+            yield BytesIO(string), base_url, None, None
+class URLFetchingError(IOError):
+    """Some error happened when fetching an URL."""
+class FatalURLFetchingError(BaseException):
+    """Some error happened when fetching an URL and must stop the rendering."""
+class URLFetcher(request.OpenerDirector):
+    """Fetcher of external resources such as images or stylesheets.
+    :param int timeout: The number of seconds before HTTP requests are dropped.
+    :param ssl.SSLContext ssl_context: An SSL context used for HTTPS requests.
+    :param dict http_headers: Additional HTTP headers used for HTTP requests.
+    :type allowed_protocols: :term:`sequence`
+    :param allowed_protocols: A set of authorized protocols, :obj:`None` means all.
+    :param bool allow_redirects: Whether HTTP redirects must be followed.
+    :param bool fail_on_errors: Whether HTTP errors should stop the rendering.
+    Another class inheriting from this class, with a ``fetch`` method that has a
+    compatible signature, can be given as the ``url_fetcher`` argument to
+    :class:`weasyprint.HTML` or :class:`weasyprint.CSS`.
+    See :ref:`URL Fetchers` for more information and examples.
+    """
+    def __init__(self, timeout=10, ssl_context=None, http_headers=None,
+                 allowed_protocols=None, allow_redirects=True, fail_on_errors=False,
+                 **kwargs):
+        super().__init__()
+        handlers = [
+            request.ProxyHandler(), request.UnknownHandler(), request.HTTPHandler(),
+            request.HTTPDefaultErrorHandler(), request.FTPHandler(),
+            request.FileHandler(), request.HTTPErrorProcessor(), request.DataHandler(),
+            request.HTTPSHandler(context=ssl_context)]
+        if allow_redirects:
+            handlers.append(request.HTTPRedirectHandler())
+        for handler in handlers:
+            self.add_handler(handler)
+        self._timeout = timeout
+        self._http_headers = {**HTTP_HEADERS, **(http_headers or {})}
+        self._allowed_protocols = allowed_protocols
+        self._fail_on_errors = fail_on_errors
+    def fetch(self, url, headers=None):
+        """Fetch a given URL.
+        :returns: A :obj:`URLFetcherResponse` instance.
+        :raises: An exception indicating failure, e.g. :obj:`ValueError` on
+            syntactically invalid URL. All exceptions are catched internally by
+            WeasyPrint, except when they inherit from :obj:`FatalURLFetchingError`.
+        """
+        # Discard URLs with no or invalid protocol.
+        if not UNICODE_SCHEME_RE.match(url):  # pragma: no cover
+            raise ValueError(f'Not an absolute URI: {url}')
+        # Discard URLs with forbidden protocol.
+        if self._allowed_protocols is not None:
+            if url.split('://', 1)[0].lower() not in self._allowed_protocols:
                 raise ValueError(f'URI uses disallowed protocol: {url}')
-        # See https://bugs.python.org/issue34702
+        # Remove query and fragment parts from file URLs.
+        # See https://bugs.python.org/issue34702.
         if url.lower().startswith('file://'):
             url = url.split('?')[0]
-            path = url2pathname(url.removeprefix('file:'))
-        else:
-            path = None
+        # Transform Unicode IRI to ASCII URI.
         url = iri_to_uri(url)
-        if http_headers is not None:
-            http_headers = {**HTTP_HEADERS, **http_headers}
+        # Open URL.
+        headers = {**self._http_headers, **(headers or {})}
+        http_request = request.Request(url, headers=headers)
+        response = super().open(http_request, timeout=self._timeout)
+        # Decompress response.
+        body = response
+        if 'Content-Encoding' in response.headers:
+            content_encoding = response.headers['Content-Encoding']
+            del response.headers['Content-Encoding']
+            if content_encoding == 'gzip':
+                body = StreamingGzipFile(fileobj=response)
+            elif content_encoding == 'deflate':
+                data = response.read()
+                try:
+                    body = zlib.decompress(data)
+                except zlib.error:
+                    # Try without zlib header or checksum.
+                    body = zlib.decompress(data, -15)
+        return URLFetcherResponse(response.url, body, response.headers, response.status)
+    def open(self, url, data=None, timeout=None):
+        if isinstance(url, request.Request):
+            return self.fetch(url.full_url, url.headers)
+        return self.fetch(url)
+    def __call__(self, url):
+        return self.fetch(url)
+class URLFetcherResponse:
+    """The HTTP response of an URL fetcher.
+    :param str url: The URL of the HTTP response.
+    :type body: :class:`str`, :class:`bytes` or :term:`file object`
+    :param body: The body of the HTTP response.
+    :type headers: dict or email.message.EmailMessage
+    :param headers: The headers of the HTTP response.
+    :param str status: The status of the HTTP response.
+    Has the same interface as :class:`urllib.response.addinfourl`.
+    If a :term:`file object` is given for the body, it is the caller’s responsibility to
+    call ``close()`` on it. The default function used internally to fetch data in
+    WeasyPrint tries to close the file object after retreiving; but if this URL fetcher
+    is used elsewhere, the file object has to be closed manually.
+    """
+    def __init__(self, url, body=None, headers=None, status='200 OK', **kwargs):
+        self.url = url
+        self.status = status
+        if isinstance(headers, EmailMessage):
+            self.headers = headers
         else:
-            http_headers = HTTP_HEADERS
-        response = urlopen(
-            Request(url, headers=http_headers), timeout=timeout,
-            context=ssl_context)
-        result = {
-            'redirected_url': response.url,
-            'mime_type': response.headers.get_content_type(),
-            'encoding': response.headers.get_param('charset'),
-            'filename': response.headers.get_filename(),
-            'path': path,
-        }
-        content_encoding = response.headers.get('Content-Encoding')
-        if content_encoding == 'gzip':
-            result['file_obj'] = StreamingGzipFile(fileobj=response)
-        elif content_encoding == 'deflate':
-            data = response.read()
-            try:
-                result['string'] = zlib.decompress(data)
-            except zlib.error:
-                # Try without zlib header or checksum
-                result['string'] = zlib.decompress(data, -15)
+            self.headers = EmailMessage()
+            for key, value in (headers or {}).items():
+                self.headers[key] = value
+        if hasattr(body, 'read'):
+            self._file_obj = body
+        elif isinstance(body, str):
+            self.headers.set_param('charset', 'utf-8')
+            self._file_obj = BytesIO(body.encode('utf-8'))
         else:
-            result['file_obj'] = response
-        return result
-    else:  # pragma: no cover
-        raise ValueError(f'Not an absolute URI: {url}')
+            self._file_obj = BytesIO(body)
+    def read(self, *args, **kwargs):
+        return self._file_obj.read(*args, **kwargs)
-class URLFetchingError(IOError):
-    """Some error happened when fetching an URL."""
+    def close(self):
+        try:
+            self._file_obj.close()
+        except Exception:  # pragma: no cover
+            # May already be closed or something.
+            # This is just cleanup anyway: log but make it non-fatal.
+            LOGGER.warning(
+                'Error when closing stream for %s:\n%s',
+                self.url, traceback.format_exc())
+    @property
+    def path(self):
+        if self.url.startswith('file:'):
+            return request.url2pathname(self.url.split('?')[0].removeprefix('file:'))
+    @property
+    def content_type(self):
+        return self.headers.get_content_type()
+    @property
+    def charset(self):
+        return self.headers.get_param('charset')
 @contextlib.contextmanager
 def fetch(url_fetcher, url):
-    """Call an url_fetcher, fill in optional data, and clean up."""
+    """Fetch an ``url`` with ```url_fetcher``, fill in optional data, and clean up.
+    Fatal errors must raise a ``FatalURLFetchingError`` that stops the rendering. All
+    other exceptions are catched and raise an ``URLFetchingError``, that is usually
+    catched by the code that fetches the resource and emits a warning.
+    """
     try:
-        result = url_fetcher(url)
+        resource = url_fetcher(url)
     except Exception as exception:
+        if getattr(url_fetcher, '_fail_on_errors', False):
+            raise FatalURLFetchingError(f'Error fetching "{url}"') from exception
         raise URLFetchingError(f'{type(exception).__name__}: {exception}')
-    result.setdefault('redirected_url', url)
-    result.setdefault('mime_type', None)
-    if 'file_obj' in result:
-        try:
-            yield result
-        finally:
-            try:
-                result['file_obj'].close()
-            except Exception:  # pragma: no cover
-                # May already be closed or something.
-                # This is just cleanup anyway: log but make it non-fatal.
-                LOGGER.warning(
-                    'Error when closing stream for %s:\n%s',
-                    url, traceback.format_exc())
-    else:
-        yield result
+    if isinstance(resource, dict):
+        warnings.warn(
+            "Returning dicts in URL fetchers is deprecated and will be removed "
+            "in WeasyPrint 69.0, please return URLFetcherResponse instead.",
+            category=DeprecationWarning)
+        if 'url' not in resource:
+            resource['url'] = resource.get('redirected_url', url)
+        resource['body'] = resource.get('file_obj', resource.get('string'))
+        content_type = resource.get('mime_type', 'application/octet-stream')
+        if charset := resource.get('encoding'):
+            content_type += f';{charset}'
+        resource['headers'] = {'Content-Type': content_type}
+        resource = URLFetcherResponse(**resource)
+    assert isinstance(resource, URLFetcherResponse), (
+        'URL fetcher must return either a dict or a URLFetcherResponse instance')
+    try:
+        yield resource
+    finally:
+        resource.close()

weasyprint 67.0__py3-none-any.whl → 68.0__py3-none-any.whl

weasyprint 67.0py3-none-any.whl → 68.0py3-none-any.whl