PyPI - weasyprint - Versions diffs - 65.1__py3-none-any.whl → 67.0__py3-none-any.whl - Mend

weasyprint 65.1py3-none-any.whl → 67.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

weasyprint/__init__.py +17 -7
weasyprint/__main__.py +21 -10
weasyprint/anchors.py +4 -4
weasyprint/css/__init__.py +732 -67
weasyprint/css/computed_values.py +65 -170
weasyprint/css/counters.py +1 -1
weasyprint/css/functions.py +206 -0
weasyprint/css/html5_ua.css +3 -7
weasyprint/css/html5_ua_form.css +2 -2
weasyprint/css/media_queries.py +3 -1
weasyprint/css/properties.py +6 -2
weasyprint/css/{utils.py → tokens.py} +306 -397
weasyprint/css/units.py +91 -0
weasyprint/css/validation/__init__.py +1 -1
weasyprint/css/validation/descriptors.py +47 -19
weasyprint/css/validation/expanders.py +7 -8
weasyprint/css/validation/properties.py +341 -357
weasyprint/document.py +20 -19
weasyprint/draw/__init__.py +56 -63
weasyprint/draw/border.py +121 -69
weasyprint/draw/color.py +1 -1
weasyprint/draw/text.py +60 -41
weasyprint/formatting_structure/boxes.py +24 -5
weasyprint/formatting_structure/build.py +33 -45
weasyprint/images.py +76 -62
weasyprint/layout/__init__.py +32 -26
weasyprint/layout/absolute.py +7 -6
weasyprint/layout/background.py +7 -7
weasyprint/layout/block.py +195 -152
weasyprint/layout/column.py +19 -24
weasyprint/layout/flex.py +54 -26
weasyprint/layout/float.py +12 -7
weasyprint/layout/grid.py +284 -90
weasyprint/layout/inline.py +121 -68
weasyprint/layout/page.py +45 -12
weasyprint/layout/percent.py +14 -10
weasyprint/layout/preferred.py +105 -63
weasyprint/layout/replaced.py +9 -6
weasyprint/layout/table.py +16 -9
weasyprint/pdf/__init__.py +58 -18
weasyprint/pdf/anchors.py +3 -4
weasyprint/pdf/fonts.py +126 -69
weasyprint/pdf/metadata.py +36 -4
weasyprint/pdf/pdfa.py +19 -3
weasyprint/pdf/pdfua.py +7 -115
weasyprint/pdf/pdfx.py +83 -0
weasyprint/pdf/stream.py +57 -49
weasyprint/pdf/tags.py +307 -0
weasyprint/stacking.py +14 -15
weasyprint/svg/__init__.py +59 -32
weasyprint/svg/bounding_box.py +4 -2
weasyprint/svg/defs.py +4 -9
weasyprint/svg/images.py +11 -3
weasyprint/svg/text.py +11 -2
weasyprint/svg/utils.py +15 -8
weasyprint/text/constants.py +1 -1
weasyprint/text/ffi.py +4 -3
weasyprint/text/fonts.py +13 -5
weasyprint/text/line_break.py +146 -43
weasyprint/urls.py +41 -13
{weasyprint-65.1.dist-info → weasyprint-67.0.dist-info}/METADATA +5 -6
weasyprint-67.0.dist-info/RECORD +77 -0
weasyprint/draw/stack.py +0 -13
weasyprint-65.1.dist-info/RECORD +0 -74
{weasyprint-65.1.dist-info → weasyprint-67.0.dist-info}/WHEEL +0 -0
{weasyprint-65.1.dist-info → weasyprint-67.0.dist-info}/entry_points.txt +0 -0
{weasyprint-65.1.dist-info → weasyprint-67.0.dist-info}/licenses/LICENSE +0 -0

weasyprint/pdf/pdfua.py CHANGED Viewed

@@ -1,125 +1,17 @@
 """PDF/UA generation."""
-import pydyf
+from functools import partial
 from .metadata import add_metadata
-def pdfua(pdf, metadata, document, page_streams, attachments, compress):
+def pdfua(pdf, metadata, document, page_streams, attachments, compress, version):
     """Set metadata for PDF/UA documents."""
-    # Structure for PDF tagging
-    content_mapping = pydyf.Dictionary({})
-    pdf.add_object(content_mapping)
-    structure_root = pydyf.Dictionary({
-        'Type': '/StructTreeRoot',
-        'ParentTree': content_mapping.reference,
-    })
-    pdf.add_object(structure_root)
-    structure_document = pydyf.Dictionary({
-        'Type': '/StructElem',
-        'S': '/Document',
-        'P': structure_root.reference,
-    })
-    pdf.add_object(structure_document)
-    structure_root['K'] = pydyf.Array([structure_document.reference])
-    pdf.catalog['StructTreeRoot'] = structure_root.reference
-    document_children = []
-    content_mapping['Nums'] = pydyf.Array()
-    links = []
-    for page_number, page_stream in enumerate(page_streams):
-        structure = {}
-        document.build_element_structure(structure)
-        parents = [None] * len(page_stream.marked)
-        for mcid, (key, box) in enumerate(page_stream.marked):
-            # Build structure elements
-            kids = [mcid]
-            if key == 'Link':
-                object_reference = pydyf.Dictionary({
-                    'Type': '/OBJR',
-                    'Obj': box.link_annotation.reference,
-                    'Pg': pdf.page_references[page_number],
-                })
-                pdf.add_object(object_reference)
-                links.append((object_reference.reference, box.link_annotation))
-            etree_element = box.element
-            child_structure_data_element = None
-            while True:
-                if etree_element is None:
-                    structure_data = structure.setdefault(
-                        box, {'parent': None})
-                else:
-                    structure_data = structure[etree_element]
-                new_element = 'element' not in structure_data
-                if new_element:
-                    child = structure_data['element'] = pydyf.Dictionary({
-                        'Type': '/StructElem',
-                        'S': f'/{key}',
-                        'K': pydyf.Array(kids),
-                        'Pg': pdf.page_references[page_number],
-                    })
-                    pdf.add_object(child)
-                    if key == 'LI':
-                        if etree_element.tag == 'dt':
-                            sub_key = 'Lbl'
-                        else:
-                            sub_key = 'LBody'
-                        real_child = pydyf.Dictionary({
-                            'Type': '/StructElem',
-                            'S': f'/{sub_key}',
-                            'K': pydyf.Array(kids),
-                            'Pg': pdf.page_references[page_number],
-                            'P': child.reference,
-                        })
-                        pdf.add_object(real_child)
-                        for kid in kids:
-                            if isinstance(kid, int):
-                                parents[kid] = real_child.reference
-                        child['K'] = pydyf.Array([real_child.reference])
-                        structure_data['element'] = real_child
-                    else:
-                        for kid in kids:
-                            if isinstance(kid, int):
-                                parents[kid] = child.reference
-                else:
-                    child = structure_data['element']
-                    child['K'].extend(kids)
-                    for kid in kids:
-                        if isinstance(kid, int):
-                            parents[kid] = child.reference
-                kid = child.reference
-                if child_structure_data_element is not None:
-                    child_structure_data_element['P'] = kid
-                if not new_element:
-                    break
-                kids = [kid]
-                child_structure_data_element = child
-                if structure_data['parent'] is None:
-                    child['P'] = structure_document.reference
-                    document_children.append(child.reference)
-                    break
-                else:
-                    etree_element = structure_data['parent']
-                key = page_stream.get_marked_content_tag(etree_element.tag)
-        content_mapping['Nums'].append(page_number)
-        content_mapping['Nums'].append(pydyf.Array(parents))
-    structure_document['K'] = pydyf.Array(document_children)
-    for i, (link, annotation) in enumerate(links, start=page_number + 1):
-        content_mapping['Nums'].append(i)
-        content_mapping['Nums'].append(link)
-        annotation['StructParent'] = i
-        annotation['F'] = 2 ** (2 - 1)
     # Common PDF metadata stream
-    add_metadata(pdf, metadata, 'ua', 1, conformance=None, compress=compress)
-    # PDF document extra metadata
-    if 'Lang' not in pdf.catalog:
-        pdf.catalog['Lang'] = pydyf.String()
-    pdf.catalog['ViewerPreferences'] = pydyf.Dictionary({
-        'DisplayDocTitle': 'true',
-    })
-    pdf.catalog['MarkInfo'] = pydyf.Dictionary({'Marked': 'true'})
+    add_metadata(pdf, metadata, 'ua', version, conformance=None, compress=compress)
-VARIANTS = {'pdf/ua-1': (pdfua, {'mark': True})}
+VARIANTS = {
+    'pdf/ua-1': (partial(pdfua, version=1), {'version': '1.7', 'pdf_tags': True}),
+    'pdf/ua-2': (partial(pdfua, version=2), {'version': '2.0', 'pdf_tags': True}),
+}

weasyprint/pdf/pdfx.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""PDF/X generation."""
+from functools import partial
+from time import localtime
+import pydyf
+from .metadata import add_metadata
+def pdfx(pdf, metadata, document, page_streams, attachments, compress, version,
+         variant):
+    """Set metadata for PDF/X documents."""
+    # Add conformance metadata.
+    conformance = f'PDF/X-{version}{variant}'
+    if version < 4:
+        pdf.info['GTS_PDFXVersion'] = pydyf.String(conformance)
+        pdf.info['GTS_PDFXConformance'] = pydyf.String(conformance)
+    pdf.info['Trapped'] = '/False'
+    now = localtime()
+    year, month, day, hour, minute, second = now[:6]
+    tz_hour, tz_minute = divmod(now.tm_gmtoff, 3600)
+    now_iso = (
+        f'{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}'
+        f'{tz_hour:+03}:{tz_minute:02}')
+    now_pdf = (
+        f'(D:{year:04}{month:02}{day:02}{hour:02}{minute:02}{second:02}'
+        f"{tz_hour:+03}'{tz_minute:02}')")
+    if not metadata.modified:
+        metadata.modified = now_iso
+        pdf.info['ModDate'] = now_pdf
+    if not metadata.created:
+        metadata.created = now_iso
+        pdf.info['CreationDate'] = now_pdf
+    # Add output intents.
+    if 'device-cmyk' not in document.color_profiles:
+        # Add standard CMYK profile.
+        pdf.catalog['OutputIntents'] = pydyf.Array([
+            pydyf.Dictionary({
+                'Type': '/OutputIntent',
+                'S': '/GTS_PDFX',
+                'OutputConditionIdentifier': pydyf.String('CGATS TR 001'),
+                'RegistryName': pydyf.String('http://www.color.org'),
+            }),
+        ])
+    # Common PDF metadata stream.
+    add_metadata(pdf, metadata, 'x', version, conformance, compress=compress)
+VARIANTS = {
+    'pdf/x-1a': (
+        partial(pdfx, version=1, variant='a:2003'),
+        {'version': '1.4', 'identifier': True},
+    ),
+    'pdf/x-3': (
+        partial(pdfx, version=3, variant=':2003'),
+        {'version': '1.4', 'identifier': True},
+    ),
+    'pdf/x-4': (
+        partial(pdfx, version=4, variant=''),
+        {'version': '1.6', 'identifier': True},
+    ),
+    'pdf/x-5g': (
+        partial(pdfx, version=5, variant='g'),
+        {'version': '1.6', 'identifier': True},
+    ),
+    # TODO: these variants forbid OutputIntent to include ICC file.
+    # 'pdf/x-4p': (
+    #     partial(pdfx, version=4, variant='p'),
+    #     {'version': '1.6', 'identifier': True},
+    # ),
+    # 'pdf/x-5pg': (
+    #     partial(pdfx, version=5, variant='pg'),
+    #     {'version': '1.6', 'identifier': True},
+    # ),
+    # 'pdf/x-5n': (
+    #     partial(pdfx, version=5, variant='n'),
+    #     {'version': '1.6', 'identifier': True},
+    # ),
+}

weasyprint/pdf/stream.py CHANGED Viewed

@@ -1,5 +1,7 @@
 """PDF stream."""
+from contextlib import contextmanager
 import pydyf
 from ..logger import LOGGER
@@ -11,14 +13,15 @@ from .fonts import Font
 class Stream(pydyf.Stream):
     """PDF stream object with extra features."""
-    def __init__(self, fonts, page_rectangle, resources, images, mark, *args, **kwargs):
+    def __init__(self, fonts, page_rectangle, resources, images, tags, color_profiles,
+                 *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.page_rectangle = page_rectangle
-        self.marked = []
         self._fonts = fonts
         self._resources = resources
         self._images = images
-        self._mark = mark
+        self._tags = tags
+        self._color_profiles = color_profiles
         self._current_color = self._current_color_stroke = None
         self._current_alpha = self._current_alpha_stroke = None
         self._current_font = self._current_font_size = None
@@ -39,8 +42,10 @@ class Stream(pydyf.Stream):
             kwargs['resources'] = self._resources
         if 'images' not in kwargs:
             kwargs['images'] = self._images
-        if 'mark' not in kwargs:
-            kwargs['mark'] = self._mark
+        if 'tags' not in kwargs:
+            kwargs['tags'] = self._tags
+        if 'color_profiles' not in kwargs:
+            kwargs['color_profiles'] = self._color_profiles
         if 'compress' not in kwargs:
             kwargs['compress'] = self.compress
         return Stream(**kwargs)
@@ -104,8 +109,21 @@ class Stream(pydyf.Stream):
             self.set_color_space('lab-d50', stroke)
             lightness, a, b = color.to('lab').coordinates
             self.set_color_special(None, stroke, lightness, a, b)
+        elif color.space == 'device-cmyk':
+            self.set_color_space('DeviceCMYK', stroke)
+            c, m, y, k = color.coordinates
+            self.set_color_special(None, stroke, c, m, y, k)
+        elif color.space.startswith('--') and color.space in self._color_profiles:
+            self.set_color_space(color.space, stroke)
+            self.set_color_special(None, stroke, *color.coordinates)
         else:
-            LOGGER.warn('Unsupported color space %s, use sRGB instead', color.space)
+            LOGGER.warning('Unsupported color space %s, use sRGB instead', color.space)
+            if len(channels) > 3:
+                channels = channels[:3]
+            elif len(channels) == 2:
+                channels = *channels, 0
+            elif len(channels) == 1:
+                channels = *channels, 0, 0
             self.set_color_rgb(*channels, stroke)
     def set_font_size(self, font, size):
@@ -248,21 +266,39 @@ class Stream(pydyf.Stream):
         self._resources['Shading'][shading.id] = shading
         return shading
-    def begin_marked_content(self, box, mcid=False, tag=None):
-        if not self._mark:
-            return
-        property_list = None
-        if tag is None:
-            tag = self.get_marked_content_tag(box.element_tag)
-        if mcid:
-            property_list = pydyf.Dictionary({'MCID': len(self.marked)})
-            self.marked.append((tag, box))
-        super().begin_marked_content(tag, property_list)
-    def end_marked_content(self):
-        if not self._mark:
-            return
-        super().end_marked_content()
+    @contextmanager
+    def stacked(self):
+        """Save and restore stream context when used with the ``with`` keyword."""
+        self.push_state()
+        try:
+            yield
+        finally:
+            self.pop_state()
+    @contextmanager
+    def marked(self, box, tag):
+        if self._tags is not None:
+            property_list = None
+            mcid = len(self._tags)
+            assert box not in self._tags
+            self._tags[box] = {'tag': tag, 'mcid': mcid}
+            property_list = pydyf.Dictionary({'MCID': mcid})
+            super().begin_marked_content(tag, property_list)
+        try:
+            yield
+        finally:
+            if self._tags is not None:
+                super().end_marked_content()
+    @contextmanager
+    def artifact(self):
+        if self._tags is not None:
+            super().begin_marked_content('Artifact')
+        try:
+            yield
+        finally:
+            if self._tags is not None:
+                super().end_marked_content()
     @staticmethod
     def create_interpolation_function(domain, c0, c1, n):
@@ -283,31 +319,3 @@ class Stream(pydyf.Stream):
             'Bounds': pydyf.Array(bounds),
             'Functions': pydyf.Array(sub_functions),
         })
-    def get_marked_content_tag(self, element_tag):
-        if element_tag == 'div':
-            return 'Div'
-        elif element_tag == 'span':
-            return 'Span'
-        elif element_tag == 'article':
-            return 'Art'
-        elif element_tag == 'section':
-            return 'Sect'
-        elif element_tag == 'blockquote':
-            return 'BlockQuote'
-        elif element_tag == 'p':
-            return 'P'
-        elif element_tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6'):
-            return element_tag.upper()
-        elif element_tag in ('dl', 'ul', 'ol'):
-            return 'L'
-        elif element_tag in ('li', 'dt', 'dd'):
-            return 'LI'
-        elif element_tag == 'table':
-            return 'Table'
-        elif element_tag in ('tr', 'th', 'td'):
-            return element_tag.upper()
-        elif element_tag in ('thead', 'tbody', 'tfoot'):
-            return element_tag[:2].upper() + element_tag[2:]
-        else:
-            return 'NonStruct'

weasyprint/pdf/tags.py ADDED Viewed

@@ -0,0 +1,307 @@
+"""PDF tagging."""
+from collections import defaultdict
+import pydyf
+from ..formatting_structure import boxes
+from ..layout.absolute import AbsolutePlaceholder
+from ..logger import LOGGER
+def add_tags(pdf, document, page_streams):
+    """Add tag tree to the document."""
+    # Add root structure.
+    content_mapping = pydyf.Dictionary({})
+    pdf.add_object(content_mapping)
+    structure_root = pydyf.Dictionary({
+        'Type': '/StructTreeRoot',
+        'ParentTree': content_mapping.reference,
+    })
+    pdf.add_object(structure_root)
+    structure_document = pydyf.Dictionary({
+        'Type': '/StructElem',
+        'S': '/Document',
+        'K': pydyf.Array(),
+        'P': structure_root.reference,
+    })
+    pdf.add_object(structure_document)
+    structure_root['K'] = pydyf.Array([structure_document.reference])
+    pdf.catalog['StructTreeRoot'] = structure_root.reference
+    # Map content.
+    content_mapping['Nums'] = pydyf.Array()
+    links = []
+    for page_number, (page, stream) in enumerate(zip(document.pages, page_streams)):
+        tags = stream._tags
+        page_box = page._page_box
+        # Prepare array for this page’s MCID-to-StructElem mapping.
+        content_mapping['Nums'].append(page_number)
+        content_mapping['Nums'].append(pydyf.Array())
+        page_nums = {}
+        # Map page box content.
+        elements = _build_box_tree(
+            page_box, structure_document, pdf, page_number, page_nums, links, tags)
+        for element in elements:
+            structure_document['K'].append(element.reference)
+        assert not tags
+        # Flatten page-local nums into global mapping.
+        sorted_refs = [ref for _, ref in sorted(page_nums.items())]
+        content_mapping['Nums'][-1].extend(sorted_refs)
+    # Add annotations for links.
+    for i, (link_reference, annotation) in enumerate(links, start=len(document.pages)):
+        content_mapping['Nums'].append(i)
+        content_mapping['Nums'].append(link_reference)
+        annotation['StructParent'] = i
+    # Add required metadata.
+    pdf.catalog['ViewerPreferences'] = pydyf.Dictionary({'DisplayDocTitle': 'true'})
+    pdf.catalog['MarkInfo'] = pydyf.Dictionary({'Marked': 'true'})
+    if 'Lang' not in pdf.catalog:
+        LOGGER.error('Missing required "lang" attribute at the root of the document')
+        pdf.catalog['Lang'] = pydyf.String()
+def _get_pdf_tag(tag):
+    """Get PDF tag corresponding to HTML tag."""
+    if tag is None:
+        return 'NonStruct'
+    elif tag == 'div':
+        return 'Div'
+    elif tag.split(':')[0] == 'a':
+        # Links and link pseudo elements create link annotations.
+        return 'Link'
+    elif tag == 'span':
+        return 'Span'
+    elif tag == 'main':
+        return 'Part'
+    elif tag == 'article':
+        return 'Art'
+    elif tag == 'section':
+        return 'Sect'
+    elif tag == 'blockquote':
+        return 'BlockQuote'
+    elif tag == 'p':
+        return 'P'
+    elif tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6'):
+        return tag.upper()
+    elif tag in ('dl', 'ul', 'ol'):
+        return 'L'
+    elif tag in ('li', 'dt', 'dd'):
+        # TODO: dt should be different.
+        return 'LI'
+    elif tag == 'li::marker':
+        return 'Lbl'
+    elif tag == 'table':
+        return 'Table'
+    elif tag in ('tr', 'th', 'td'):
+        return tag.upper()
+    elif tag in ('thead', 'tbody', 'tfoot'):
+        return tag[:2].upper() + tag[2:]
+    elif tag == 'img':
+        return 'Figure'
+    elif tag in ('caption', 'figcaption'):
+        return 'Caption'
+    else:
+        return 'NonStruct'
+def _build_box_tree(box, parent, pdf, page_number, nums, links, tags):
+    """Recursively build tag tree for given box and yield children."""
+    # Special case for absolute elements.
+    if isinstance(box, AbsolutePlaceholder):
+        box = box._box
+    element_tag = None if box.element is None else box.element_tag
+    tag = _get_pdf_tag(element_tag)
+    # Special case for html, body, page boxes and margin boxes.
+    if element_tag in ('html', 'body') or isinstance(box, boxes.PageBox):
+        # Avoid generate page, html and body boxes as a semantic node, yield children.
+        if isinstance(box, boxes.ParentBox) and not isinstance(box, boxes.LineBox):
+            for child in box.children:
+                yield from _build_box_tree(
+                    child, parent, pdf, page_number, nums, links, tags)
+            return
+    elif isinstance(box, boxes.MarginBox):
+        # Build tree for margin boxes but don’t link it to main tree. It ensures that
+        # marked content is mapped in document and removed from list. It could be
+        # included in tree as Artifact, but that’s only allowed in PDF 2.0.
+        for child in box.children:
+            tuple(_build_box_tree(child, parent, pdf, page_number, nums, links, tags))
+        return
+    # Create box element.
+    if tag == 'LI':
+        anonymous_list_element = parent['S'] == '/LI'
+        anonymous_li_child = parent['S'] == '/LBody'
+        dl_item = box.element_tag in ('dt', 'dd')
+        no_bullet_li = box.element_tag == 'li' and (
+            'list-item' not in box.style['display'] or
+            box.style['list_style_type'] == 'none')
+        if anonymous_list_element:
+            # Store as list item body.
+            tag = 'LBody'
+        elif anonymous_li_child:
+            # Store as non struct list item body child.
+            tag = 'NonStruct'
+        elif dl_item or no_bullet_li:
+            # Wrap in list item.
+            tag = 'LBody'
+            parent = pydyf.Dictionary({
+                'Type': '/StructElem',
+                'S': '/LI',
+                'K': pydyf.Array([]),
+                'Pg': pdf.page_references[page_number],
+                'P': parent.reference,
+            })
+            pdf.add_object(parent)
+            children = _build_box_tree(box, parent, pdf, page_number, nums, links, tags)
+            for child in children:
+                parent['K'].append(child.reference)
+            yield parent
+            return
+    element = pydyf.Dictionary({
+        'Type': '/StructElem',
+        'S': f'/{tag}',
+        'K': pydyf.Array([]),
+        'Pg': pdf.page_references[page_number],
+        'P': parent.reference,
+    })
+    pdf.add_object(element)
+    # Handle special cases.
+    if tag == 'Figure':
+        # Add extra data for images.
+        x1, y1 = box.content_box_x(), box.content_box_y()
+        x2, y2 = x1 + box.width, y1 + box.height
+        element['A'] = pydyf.Dictionary({
+            'O': '/Layout',
+            'BBox': pydyf.Array((x1, y1, x2, y2)),
+        })
+        if alt := box.element.attrib.get('alt'):
+            element['Alt'] = pydyf.String(alt)
+        else:
+            source = box.element.attrib.get('src', 'unknown')
+            LOGGER.error(f'Image "{source}" has no required alt description')
+    elif tag == 'Table':
+        # Use wrapped table as tagged box, and put captions in it.
+        if box.is_table_wrapper:
+            # Can be false if table has another display type.
+            wrapper, table = box, box.get_wrapped_table()
+            box = table.copy_with_children([])
+            for child in wrapper.children:
+                box.children.extend(child.children if child is table else [child])
+    elif tag == 'TH':
+        # Set identifier for table headers to reference them in cells.
+        element['ID'] = pydyf.String(id(box))
+    elif tag == 'TD':
+        # Store table cell element to map it to headers later.
+        # TODO: don’t use the box to store this.
+        box.mark = element
+    # Include link annotations.
+    if box.link_annotation:
+        annotation = box.link_annotation
+        object_reference = pydyf.Dictionary({
+            'Type': '/OBJR',
+            'Obj': annotation.reference,
+            'Pg': pdf.page_references[page_number],
+        })
+        pdf.add_object(object_reference)
+        links.append((element.reference, annotation))
+        element['K'].append(object_reference.reference)
+    if isinstance(box, boxes.ParentBox):
+        # Build tree for box children.
+        for child in box.children:
+            children = child.children if isinstance(child, boxes.LineBox) else [child]
+            for child in children:
+                if isinstance(child, boxes.TextBox):
+                    # Add marked element from the stream.
+                    kid = tags.pop(child)
+                    assert kid['mcid'] not in nums
+                    if tag == 'Link':
+                        # Associate MCID directly with link reference.
+                        element['K'].append(kid['mcid'])
+                        nums[kid['mcid']] = element.reference
+                    else:
+                        kid_element = pydyf.Dictionary({
+                            'Type': '/StructElem',
+                            'S': f'/{kid["tag"]}',
+                            'K': pydyf.Array([kid['mcid']]),
+                            'Pg': pdf.page_references[page_number],
+                            'P': element.reference,
+                        })
+                        pdf.add_object(kid_element)
+                        element['K'].append(kid_element.reference)
+                        nums[kid['mcid']] = kid_element.reference
+                else:
+                    # Recursively build tree for child.
+                    if child.element_tag in ('ul', 'ol') and element['S'] == '/LI':
+                        # In PDFs, nested lists are linked to the parent list, but in
+                        # HTML, nested lists are linked to a parent’s list item.
+                        child_parent = parent
+                    else:
+                        child_parent = element
+                    child_elements = _build_box_tree(
+                        child, child_parent, pdf, page_number, nums, links, tags)
+                    # Check if it is already been referenced before.
+                    for child_element in child_elements:
+                        child_parent['K'].append(child_element.reference)
+    else:
+        # Add replaced box.
+        assert isinstance(box, boxes.ReplacedBox)
+        kid = tags.pop(box)
+        element['K'].append(kid['mcid'])
+        assert kid['mcid'] not in nums
+        nums[kid['mcid']] = element.reference
+    # Link table cells to related headers.
+    if tag == 'Table':
+        def _get_rows(table_box):
+            for child in table_box.children:
+                if child.element_tag == 'tr':
+                    yield child
+                else:
+                    yield from _get_rows(child)
+        # Get headers and rows.
+        column_headers = defaultdict(list)
+        row_headers = defaultdict(list)
+        rows = tuple(_get_rows(box))
+        # Find column and row headers.
+        # TODO: handle rowspan and colspan values.
+        for i, row in enumerate(rows):
+            for j, cell in enumerate(row.children):
+                if cell.element is None:
+                    continue
+                if cell.element_tag == 'th':
+                    # TODO: handle rowgroup and colgroup values.
+                    if cell.element.attrib.get('scope') == 'row':
+                        row_headers[i].append(pydyf.String(id(cell)))
+                    else:
+                        column_headers[j].append(pydyf.String(id(cell)))
+        # Map headers to cells.
+        for i, row in enumerate(rows):
+            for j, cell in enumerate(row.children):
+                if cell.element is None:
+                    continue
+                if cell.element_tag == 'td':
+                    cell.mark['A'] = pydyf.Dictionary({
+                        'O': '/Table',
+                        'Headers': pydyf.Array(row_headers[i] + column_headers[j]),
+                    })
+    yield element

weasyprint 65.1__py3-none-any.whl → 67.0__py3-none-any.whl

weasyprint 65.1py3-none-any.whl → 67.0py3-none-any.whl