weasyprint 67.0__py3-none-any.whl → 68.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- weasyprint/__init__.py +35 -103
- weasyprint/__main__.py +107 -80
- weasyprint/css/__init__.py +4 -10
- weasyprint/css/functions.py +5 -0
- weasyprint/css/html5_ua.css +1 -1
- weasyprint/css/tokens.py +4 -1
- weasyprint/css/validation/properties.py +4 -4
- weasyprint/document.py +4 -64
- weasyprint/draw/text.py +4 -2
- weasyprint/formatting_structure/boxes.py +4 -1
- weasyprint/formatting_structure/build.py +111 -37
- weasyprint/images.py +27 -32
- weasyprint/layout/__init__.py +2 -1
- weasyprint/layout/grid.py +25 -14
- weasyprint/layout/page.py +4 -4
- weasyprint/layout/preferred.py +35 -2
- weasyprint/pdf/__init__.py +12 -1
- weasyprint/pdf/anchors.py +10 -16
- weasyprint/pdf/fonts.py +12 -3
- weasyprint/pdf/metadata.py +153 -98
- weasyprint/pdf/pdfa.py +1 -3
- weasyprint/pdf/pdfua.py +1 -3
- weasyprint/pdf/pdfx.py +1 -3
- weasyprint/pdf/stream.py +0 -2
- weasyprint/svg/__init__.py +51 -30
- weasyprint/svg/css.py +21 -4
- weasyprint/svg/defs.py +5 -3
- weasyprint/text/fonts.py +2 -3
- weasyprint/urls.py +272 -96
- {weasyprint-67.0.dist-info → weasyprint-68.0.dist-info}/METADATA +2 -1
- {weasyprint-67.0.dist-info → weasyprint-68.0.dist-info}/RECORD +34 -34
- {weasyprint-67.0.dist-info → weasyprint-68.0.dist-info}/WHEEL +0 -0
- {weasyprint-67.0.dist-info → weasyprint-68.0.dist-info}/entry_points.txt +0 -0
- {weasyprint-67.0.dist-info → weasyprint-68.0.dist-info}/licenses/LICENSE +0 -0
weasyprint/layout/grid.py
CHANGED
|
@@ -1123,6 +1123,7 @@ def grid_layout(context, box, bottom_space, skip_stack, containing_block,
|
|
|
1123
1123
|
# Find resume_at row.
|
|
1124
1124
|
this_page_children = []
|
|
1125
1125
|
resume_row = None
|
|
1126
|
+
extra_skip_height = 0
|
|
1126
1127
|
if skip_stack:
|
|
1127
1128
|
from .block import block_level_layout
|
|
1128
1129
|
first_skip_row = min(skip_stack)
|
|
@@ -1130,7 +1131,6 @@ def grid_layout(context, box, bottom_space, skip_stack, containing_block,
|
|
|
1130
1131
|
skip_height = (
|
|
1131
1132
|
sum(size for size, _ in rows_sizes[:last_skip_row]) +
|
|
1132
1133
|
(len(rows_sizes[:last_skip_row]) - 1) * row_gap)
|
|
1133
|
-
extra_skip_height = 0
|
|
1134
1134
|
for child, (x, y, width, height) in children_positions.items():
|
|
1135
1135
|
if (advancement := box.advancements.get((x, y))) is None:
|
|
1136
1136
|
continue
|
|
@@ -1271,13 +1271,16 @@ def grid_layout(context, box, bottom_space, skip_stack, containing_block,
|
|
|
1271
1271
|
if child.margin_left == 'auto':
|
|
1272
1272
|
child.margin_left = 0
|
|
1273
1273
|
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1274
|
+
child_border_width = width - (child.margin_left + child.margin_right)
|
|
1275
|
+
child_content_width = child_border_width - (
|
|
1276
|
+
child.border_left_width + child.padding_left +
|
|
1277
|
+
child.border_right_width + child.padding_right)
|
|
1278
|
+
child_border_height = height - child.margin_bottom
|
|
1279
|
+
child_content_height = child_border_height - (
|
|
1280
|
+
child.border_bottom_width + child.padding_bottom)
|
|
1279
1281
|
if not child_skip_stack or child.style['box_decoration_break'] == 'clone':
|
|
1280
|
-
|
|
1282
|
+
child_border_height -= child.margin_top
|
|
1283
|
+
child_content_height -= (
|
|
1281
1284
|
child.margin_top + child.border_top_width + child.padding_top)
|
|
1282
1285
|
|
|
1283
1286
|
justify_self = set(child.style['justify_self'])
|
|
@@ -1286,6 +1289,9 @@ def grid_layout(context, box, bottom_space, skip_stack, containing_block,
|
|
|
1286
1289
|
if justify_self & {'normal', 'stretch'}:
|
|
1287
1290
|
if child.style['width'] == 'auto':
|
|
1288
1291
|
child.style = child.style.copy()
|
|
1292
|
+
child_width = (
|
|
1293
|
+
child_content_width if child.style['box_sizing'] == 'content-box'
|
|
1294
|
+
else child_border_width)
|
|
1289
1295
|
child.style['width'] = Dimension(child_width, 'px')
|
|
1290
1296
|
align_self = set(child.style['align_self'])
|
|
1291
1297
|
if align_self & {'auto'}:
|
|
@@ -1293,6 +1299,9 @@ def grid_layout(context, box, bottom_space, skip_stack, containing_block,
|
|
|
1293
1299
|
if align_self & {'normal', 'stretch'}:
|
|
1294
1300
|
if child.style['height'] == 'auto':
|
|
1295
1301
|
child.style = child.style.copy()
|
|
1302
|
+
child_height = (
|
|
1303
|
+
child_content_height if child.style['box_sizing'] == 'content-box'
|
|
1304
|
+
else child_border_height)
|
|
1296
1305
|
child.style['height'] = Dimension(child_height, 'px')
|
|
1297
1306
|
|
|
1298
1307
|
# TODO: Find a better solution for the layout.
|
|
@@ -1338,11 +1347,11 @@ def grid_layout(context, box, bottom_space, skip_stack, containing_block,
|
|
|
1338
1347
|
continue
|
|
1339
1348
|
|
|
1340
1349
|
if justify_self & {'normal', 'stretch'}:
|
|
1341
|
-
new_child.width = max(
|
|
1350
|
+
new_child.width = max(child_content_width, new_child.width)
|
|
1342
1351
|
else:
|
|
1343
1352
|
if new_child.style['width'] == 'auto':
|
|
1344
1353
|
new_child.width = max_content_width(context, new_child, outer=False)
|
|
1345
|
-
diff =
|
|
1354
|
+
diff = child_content_width - new_child.width
|
|
1346
1355
|
if justify_self & {'center'}:
|
|
1347
1356
|
new_child.translate(diff / 2, 0)
|
|
1348
1357
|
elif justify_self & {'right', 'end', 'flex-end', 'self-end'}:
|
|
@@ -1350,9 +1359,9 @@ def grid_layout(context, box, bottom_space, skip_stack, containing_block,
|
|
|
1350
1359
|
|
|
1351
1360
|
# TODO: Apply auto margins.
|
|
1352
1361
|
if align_self & {'normal', 'stretch'}:
|
|
1353
|
-
new_child.height = max(
|
|
1362
|
+
new_child.height = max(child_content_height, new_child.height)
|
|
1354
1363
|
else:
|
|
1355
|
-
diff =
|
|
1364
|
+
diff = child_content_height - new_child.height
|
|
1356
1365
|
if align_self & {'center'}:
|
|
1357
1366
|
new_child.translate(0, diff / 2)
|
|
1358
1367
|
elif align_self & {'end', 'flex-end', 'self-end'}:
|
|
@@ -1368,8 +1377,9 @@ def grid_layout(context, box, bottom_space, skip_stack, containing_block,
|
|
|
1368
1377
|
context.finish_block_formatting_context(box)
|
|
1369
1378
|
return None, None, {'break': 'any', 'page': None}, [], False
|
|
1370
1379
|
|
|
1371
|
-
old_advancements = box.advancements
|
|
1372
|
-
advancements = box.advancements
|
|
1380
|
+
old_advancements = box.advancements.copy()
|
|
1381
|
+
advancements = box.advancements
|
|
1382
|
+
advancements.clear()
|
|
1373
1383
|
box = box.copy_with_children(new_children)
|
|
1374
1384
|
if isinstance(box, boxes.InlineGridBox):
|
|
1375
1385
|
# TODO: Synthetize a real baseline value.
|
|
@@ -1420,7 +1430,8 @@ def grid_layout(context, box, bottom_space, skip_stack, containing_block,
|
|
|
1420
1430
|
else:
|
|
1421
1431
|
# Child fully drawn, save the extra height added to reach the bottom of
|
|
1422
1432
|
# the page to substract it from the advancements.
|
|
1423
|
-
extra_advancement = max(
|
|
1433
|
+
extra_advancement = max(
|
|
1434
|
+
extra_advancement, child.height - child_content_height)
|
|
1424
1435
|
|
|
1425
1436
|
# Substract the extra height added to reach the bottom of the page from all the
|
|
1426
1437
|
# advancements.
|
weasyprint/layout/page.py
CHANGED
|
@@ -389,7 +389,7 @@ def make_margin_boxes(context, page, state):
|
|
|
389
389
|
if box.is_generated:
|
|
390
390
|
# @margins mustn't manipulate page-context counters
|
|
391
391
|
margin_state = copy.deepcopy(state)
|
|
392
|
-
quote_depth, counter_values, counter_scopes = margin_state
|
|
392
|
+
quote_depth, counter_values, counter_scopes, _page_groups = margin_state
|
|
393
393
|
# TODO: check this, probably useless
|
|
394
394
|
counter_scopes.append(set())
|
|
395
395
|
build.update_counters(margin_state, box.style)
|
|
@@ -901,7 +901,7 @@ def _update_page_groups(page_groups, resume_at, next_page, root_box, blank):
|
|
|
901
901
|
return next_page['page']
|
|
902
902
|
|
|
903
903
|
|
|
904
|
-
def remake_page(index,
|
|
904
|
+
def remake_page(index, context, root_box, html):
|
|
905
905
|
"""Return one laid out page without margin boxes.
|
|
906
906
|
|
|
907
907
|
Start with the initial values from ``context.page_maker[index]``.
|
|
@@ -932,6 +932,7 @@ def remake_page(index, page_groups, context, root_box, html):
|
|
|
932
932
|
(next_page_side == 'right' and not right_page) or
|
|
933
933
|
(context.reported_footnotes and resume_at is None))
|
|
934
934
|
side = 'right' if right_page else 'left'
|
|
935
|
+
page_groups = page_state[3]
|
|
935
936
|
name = _update_page_groups(page_groups, resume_at, next_page, root_box, blank)
|
|
936
937
|
groups = tuple((name, index) for name, index, _ in page_groups)
|
|
937
938
|
page_type = PageType(side, blank, name, index, groups)
|
|
@@ -990,7 +991,6 @@ def make_all_pages(context, root_box, html, pages):
|
|
|
990
991
|
"""
|
|
991
992
|
i = 0
|
|
992
993
|
reported_footnotes = None
|
|
993
|
-
page_groups = []
|
|
994
994
|
while True:
|
|
995
995
|
remake_state = context.page_maker[i][-1]
|
|
996
996
|
if (len(pages) == 0 or
|
|
@@ -1002,7 +1002,7 @@ def make_all_pages(context, root_box, html, pages):
|
|
|
1002
1002
|
remake_state['pages_wanted'] = False
|
|
1003
1003
|
remake_state['anchors'] = []
|
|
1004
1004
|
remake_state['content_lookups'] = []
|
|
1005
|
-
page, resume_at = remake_page(i,
|
|
1005
|
+
page, resume_at = remake_page(i, context, root_box, html)
|
|
1006
1006
|
reported_footnotes = context.reported_footnotes
|
|
1007
1007
|
yield page
|
|
1008
1008
|
else:
|
weasyprint/layout/preferred.py
CHANGED
|
@@ -101,9 +101,32 @@ def _block_content_width(context, box, function, outer):
|
|
|
101
101
|
function(context, child, outer=True) for child in box.children
|
|
102
102
|
if not child.is_absolutely_positioned()]
|
|
103
103
|
width = max(children_widths) if children_widths else 0
|
|
104
|
+
elif box.style['box_sizing'] == 'content-box':
|
|
105
|
+
width = width.value
|
|
104
106
|
else:
|
|
105
|
-
assert width.unit.lower() == 'px'
|
|
106
107
|
width = width.value
|
|
108
|
+
percentages = 0
|
|
109
|
+
|
|
110
|
+
for value in ('padding_left', 'padding_right'):
|
|
111
|
+
style_value = box.style[value]
|
|
112
|
+
if style_value != 'auto':
|
|
113
|
+
if style_value.unit.lower() == 'px':
|
|
114
|
+
width -= style_value.value
|
|
115
|
+
else:
|
|
116
|
+
assert style_value.unit == '%'
|
|
117
|
+
percentages += style_value.value
|
|
118
|
+
|
|
119
|
+
# Same as margin_width().
|
|
120
|
+
collapse = box.style['border_collapse'] == 'collapse'
|
|
121
|
+
if collapse and hasattr(box, 'border_left_width'):
|
|
122
|
+
width -= box.border_left_width
|
|
123
|
+
else:
|
|
124
|
+
width -= box.style['border_left_width']
|
|
125
|
+
if collapse and hasattr(box, 'border_right_width'):
|
|
126
|
+
width -= box.border_right_width
|
|
127
|
+
else:
|
|
128
|
+
width -= box.style['border_right_width']
|
|
129
|
+
width = (100 - min(100, percentages)) * max(0, width) / 100
|
|
107
130
|
|
|
108
131
|
return adjust(box, outer, width)
|
|
109
132
|
|
|
@@ -362,7 +385,17 @@ def inline_line_widths(context, box, outer, is_line_start, minimum, skip_stack=N
|
|
|
362
385
|
# "By default, there is a break opportunity
|
|
363
386
|
# both before and after any inline object."
|
|
364
387
|
if minimum:
|
|
365
|
-
|
|
388
|
+
# "For soft wrap opportunities defined by the boundary between two
|
|
389
|
+
# characters or atomic inlines, the white-space property on the nearest
|
|
390
|
+
# common ancestor of the two characters controls breaking; which
|
|
391
|
+
# elements’ line-break, word-break, and overflow-wrap properties control
|
|
392
|
+
# the determination of soft wrap opportunities at such boundaries is
|
|
393
|
+
# undefined in this level." We choose to always follow the parent’s
|
|
394
|
+
# value here, other parts of the line-breaking algorithm do the same.
|
|
395
|
+
if box.style['white_space'] in ('normal', 'pre-wrap', 'pre-line'):
|
|
396
|
+
lines = [None, min_content_width(context, child), None]
|
|
397
|
+
else:
|
|
398
|
+
lines = [min_content_width(context, child)]
|
|
366
399
|
else:
|
|
367
400
|
lines = [max_content_width(context, child)]
|
|
368
401
|
# The first text line goes on the current line.
|
weasyprint/pdf/__init__.py
CHANGED
|
@@ -9,6 +9,7 @@ from .. import VERSION, Attachment
|
|
|
9
9
|
from ..html import W3C_DATE_RE
|
|
10
10
|
from ..logger import LOGGER, PROGRESS_LOGGER
|
|
11
11
|
from ..matrix import Matrix
|
|
12
|
+
from ..urls import select_source
|
|
12
13
|
from . import debug, pdfa, pdfua, pdfx
|
|
13
14
|
from .fonts import build_fonts_dictionary
|
|
14
15
|
from .stream import Stream
|
|
@@ -273,14 +274,24 @@ def generate_pdf(document, target, zoom, **options):
|
|
|
273
274
|
key = key.encode('ascii', errors='ignore').decode()
|
|
274
275
|
if key:
|
|
275
276
|
pdf.info[key] = pydyf.String(value)
|
|
277
|
+
if options['xmp_metadata']:
|
|
278
|
+
for url in options['xmp_metadata']:
|
|
279
|
+
result = select_source(url)
|
|
280
|
+
with result as (file_obj, base_url, charset, _):
|
|
281
|
+
xmp_metadata = file_obj.read()
|
|
282
|
+
if charset:
|
|
283
|
+
xmp_metadata = xmp_metadata.decode(charset).encode()
|
|
284
|
+
metadata.xmp_metadata.append(xmp_metadata)
|
|
276
285
|
|
|
277
286
|
# Embedded files
|
|
278
287
|
attachments = metadata.attachments.copy()
|
|
279
288
|
if options['attachments']:
|
|
289
|
+
relationships = iter(options['attachment_relationships'] or [])
|
|
280
290
|
for attachment in options['attachments']:
|
|
281
291
|
if not isinstance(attachment, Attachment):
|
|
282
292
|
attachment = Attachment(
|
|
283
|
-
attachment, url_fetcher=document.url_fetcher
|
|
293
|
+
attachment, url_fetcher=document.url_fetcher,
|
|
294
|
+
relationship=next(relationships, 'Unspecified'))
|
|
284
295
|
attachments.append(attachment)
|
|
285
296
|
pdf_attachments = []
|
|
286
297
|
for attachment in attachments:
|
weasyprint/pdf/anchors.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""Insert anchors, links, bookmarks and inputs in PDFs."""
|
|
2
2
|
|
|
3
3
|
import collections
|
|
4
|
-
import io
|
|
5
4
|
import mimetypes
|
|
6
5
|
from hashlib import md5
|
|
7
6
|
from os.path import basename
|
|
@@ -330,18 +329,12 @@ def write_pdf_attachment(pdf, attachment, compress):
|
|
|
330
329
|
"""Write an attachment to the PDF stream."""
|
|
331
330
|
# Attachments from document links like <link> or <a> can only be URLs.
|
|
332
331
|
# They're passed in as tuples
|
|
333
|
-
url = None
|
|
334
|
-
uncompressed_length = 0
|
|
335
|
-
stream = b''
|
|
332
|
+
url = mime_type = None
|
|
336
333
|
try:
|
|
337
|
-
with attachment.source as (
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
source = io.BytesIO(source)
|
|
342
|
-
for data in iter(lambda: source.read(4096), b''):
|
|
343
|
-
uncompressed_length += len(data)
|
|
344
|
-
stream += data
|
|
334
|
+
with attachment.source as (file_obj, url, _, mime_type):
|
|
335
|
+
stream = file_obj.read()
|
|
336
|
+
if isinstance(stream, str):
|
|
337
|
+
stream = stream.encode()
|
|
345
338
|
except URLFetchingError as exception:
|
|
346
339
|
LOGGER.error('Failed to load attachment: %s', exception)
|
|
347
340
|
LOGGER.debug('Error while loading attachment:', exc_info=exception)
|
|
@@ -356,9 +349,10 @@ def write_pdf_attachment(pdf, attachment, compress):
|
|
|
356
349
|
filename = basename(unquote(urlsplit(url).path))
|
|
357
350
|
else:
|
|
358
351
|
filename = 'attachment.bin'
|
|
359
|
-
mime_type =
|
|
360
|
-
|
|
361
|
-
|
|
352
|
+
mime_type = (
|
|
353
|
+
mime_type or
|
|
354
|
+
mimetypes.guess_type(filename, strict=False)[0] or
|
|
355
|
+
'application/octet-stream')
|
|
362
356
|
|
|
363
357
|
creation = pydyf.String(attachment.created.strftime('D:%Y%m%d%H%M%SZ'))
|
|
364
358
|
mod = pydyf.String(attachment.modified.strftime('D:%Y%m%d%H%M%SZ'))
|
|
@@ -367,7 +361,7 @@ def write_pdf_attachment(pdf, attachment, compress):
|
|
|
367
361
|
'Subtype': f'/{mime_type.replace("/", "#2f")}',
|
|
368
362
|
'Params': pydyf.Dictionary({
|
|
369
363
|
'CheckSum': f'<{attachment.md5}>',
|
|
370
|
-
'Size':
|
|
364
|
+
'Size': len(stream),
|
|
371
365
|
'CreationDate': creation,
|
|
372
366
|
'ModDate': mod,
|
|
373
367
|
})
|
weasyprint/pdf/fonts.py
CHANGED
|
@@ -618,14 +618,23 @@ def _build_vector_font_dictionary(font_dictionary, pdf, font, widths, compress,
|
|
|
618
618
|
if font.missing:
|
|
619
619
|
# Add CMap that doesn’t include missing glyphs, so that they can be replaced by
|
|
620
620
|
# .notdef.
|
|
621
|
+
cmap_extra = pydyf.Dictionary({
|
|
622
|
+
'Type': '/CMap',
|
|
623
|
+
'CMapName': '/WP-Encod-0',
|
|
624
|
+
'CIDSystemInfo': pydyf.Dictionary({
|
|
625
|
+
'Registry': pydyf.String('Adobe'),
|
|
626
|
+
'Ordering': pydyf.String('Identity'),
|
|
627
|
+
'Supplement': 0,
|
|
628
|
+
}),
|
|
629
|
+
})
|
|
621
630
|
encoding = pydyf.Stream([
|
|
622
631
|
b'/CIDInit /ProcSet findresource begin',
|
|
623
632
|
b'12 dict begin',
|
|
624
633
|
b'begincmap',
|
|
625
634
|
b'/CIDSystemInfo',
|
|
626
635
|
b'3 dict dup begin',
|
|
627
|
-
b'/Registry (
|
|
628
|
-
b'/Ordering (
|
|
636
|
+
b'/Registry (Adobe) def',
|
|
637
|
+
b'/Ordering (Identity) def',
|
|
629
638
|
b'/Supplement 0 def',
|
|
630
639
|
b'end def',
|
|
631
640
|
b'/CMapName /WP-Encod-0 def',
|
|
@@ -633,7 +642,7 @@ def _build_vector_font_dictionary(font_dictionary, pdf, font, widths, compress,
|
|
|
633
642
|
b'1 begincodespacerange',
|
|
634
643
|
b'<0000> <ffff>',
|
|
635
644
|
b'endcodespacerange',
|
|
636
|
-
], compress=compress)
|
|
645
|
+
], cmap_extra, compress=compress)
|
|
637
646
|
available = tuple(font.to_unicode)
|
|
638
647
|
available_length = len(available)
|
|
639
648
|
for i in range(ceil(available_length / 100)):
|
weasyprint/pdf/metadata.py
CHANGED
|
@@ -11,6 +11,7 @@ from .. import __version__
|
|
|
11
11
|
NS = {
|
|
12
12
|
'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
|
|
13
13
|
'dc': 'http://purl.org/dc/elements/1.1/',
|
|
14
|
+
'': '',
|
|
14
15
|
'xmp': 'http://ns.adobe.com/xap/1.0/',
|
|
15
16
|
'xmpMM': 'http://ns.adobe.com/xap/1.0/mm/',
|
|
16
17
|
'pdf': 'http://ns.adobe.com/pdf/1.3/',
|
|
@@ -23,110 +24,164 @@ for key, value in NS.items():
|
|
|
23
24
|
register_namespace(key, value)
|
|
24
25
|
|
|
25
26
|
|
|
26
|
-
|
|
27
|
-
"""
|
|
28
|
-
|
|
29
|
-
Described in ISO-32000-1:2008, 14.3.2.
|
|
27
|
+
class DocumentMetadata:
|
|
28
|
+
"""Meta-information belonging to a whole :class:`Document`.
|
|
30
29
|
|
|
30
|
+
New attributes may be added in future versions of WeasyPrint.
|
|
31
31
|
"""
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
32
|
+
def __init__(self, title=None, authors=None, description=None, keywords=None,
|
|
33
|
+
generator=None, created=None, modified=None, attachments=None,
|
|
34
|
+
lang=None, custom=None, xmp_metadata=None):
|
|
35
|
+
#: The title of the document, as a string or :obj:`None`.
|
|
36
|
+
#: Extracted from the ``<title>`` element in HTML
|
|
37
|
+
#: and written to the ``/Title`` info field in PDF.
|
|
38
|
+
self.title = title
|
|
39
|
+
#: The authors of the document, as a list of strings.
|
|
40
|
+
#: (Defaults to the empty list.)
|
|
41
|
+
#: Extracted from the ``<meta name=author>`` elements in HTML
|
|
42
|
+
#: and written to the ``/Author`` info field in PDF.
|
|
43
|
+
self.authors = authors or []
|
|
44
|
+
#: The description of the document, as a string or :obj:`None`.
|
|
45
|
+
#: Extracted from the ``<meta name=description>`` element in HTML
|
|
46
|
+
#: and written to the ``/Subject`` info field in PDF.
|
|
47
|
+
self.description = description
|
|
48
|
+
#: Keywords associated with the document, as a list of strings.
|
|
49
|
+
#: (Defaults to the empty list.)
|
|
50
|
+
#: Extracted from ``<meta name=keywords>`` elements in HTML
|
|
51
|
+
#: and written to the ``/Keywords`` info field in PDF.
|
|
52
|
+
self.keywords = keywords or []
|
|
53
|
+
#: The name of one of the software packages
|
|
54
|
+
#: used to generate the document, as a string or :obj:`None`.
|
|
55
|
+
#: Extracted from the ``<meta name=generator>`` element in HTML
|
|
56
|
+
#: and written to the ``/Creator`` info field in PDF.
|
|
57
|
+
self.generator = generator
|
|
58
|
+
#: The creation date of the document, as a string or :obj:`None`.
|
|
59
|
+
#: Dates are in one of the six formats specified in
|
|
60
|
+
#: `W3C’s profile of ISO 8601 <https://www.w3.org/TR/NOTE-datetime>`_.
|
|
61
|
+
#: Extracted from the ``<meta name=dcterms.created>`` element in HTML
|
|
62
|
+
#: and written to the ``/CreationDate`` info field in PDF.
|
|
63
|
+
self.created = created
|
|
64
|
+
#: The modification date of the document, as a string or :obj:`None`.
|
|
65
|
+
#: Dates are in one of the six formats specified in
|
|
66
|
+
#: `W3C’s profile of ISO 8601 <https://www.w3.org/TR/NOTE-datetime>`_.
|
|
67
|
+
#: Extracted from the ``<meta name=dcterms.modified>`` element in HTML
|
|
68
|
+
#: and written to the ``/ModDate`` info field in PDF.
|
|
69
|
+
self.modified = modified
|
|
70
|
+
#: A list of :class:`attachments <weasyprint.Attachment>`, empty by default.
|
|
71
|
+
#: Extracted from the ``<link rel=attachment>`` elements in HTML
|
|
72
|
+
#: and written to the ``/EmbeddedFiles`` dictionary in PDF.
|
|
73
|
+
self.attachments = attachments or []
|
|
74
|
+
#: Document language as BCP 47 language tags.
|
|
75
|
+
#: Extracted from ``<html lang=lang>`` in HTML.
|
|
76
|
+
self.lang = lang
|
|
77
|
+
#: Custom metadata, as a dict whose keys are the metadata names and
|
|
78
|
+
#: values are the metadata values.
|
|
79
|
+
self.custom = custom or {}
|
|
80
|
+
#: A list of XML bytestrings to add into the XMP metadata.
|
|
81
|
+
self.xmp_metadata = xmp_metadata or []
|
|
40
82
|
|
|
41
83
|
|
|
42
|
-
def
|
|
43
|
-
|
|
84
|
+
def include_in_pdf(self, pdf, variant, version, conformance, compress):
|
|
85
|
+
"""Add PDF stream of metadata.
|
|
44
86
|
|
|
45
|
-
|
|
87
|
+
Described in ISO-32000-1:2008, 14.3.2.
|
|
46
88
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
89
|
+
"""
|
|
90
|
+
header = b'<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>\n'
|
|
91
|
+
header += b'<x:xmpmeta xmlns:x="adobe:ns:meta/">'
|
|
92
|
+
footer = b'</x:xmpmeta>\n<?xpacket end="r"?>'
|
|
93
|
+
xml_data = self.generate_rdf_metadata(variant, version, conformance)
|
|
94
|
+
stream_content = b'\n'.join((header, xml_data, *self.xmp_metadata, footer))
|
|
95
|
+
extra = {'Type': '/Metadata', 'Subtype': '/XML'}
|
|
96
|
+
metadata = pydyf.Stream([stream_content], extra, compress)
|
|
97
|
+
pdf.add_object(metadata)
|
|
98
|
+
pdf.catalog['Metadata'] = metadata.reference
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def generate_rdf_metadata(self, variant, version, conformance):
|
|
102
|
+
"""Generate RDF metadata as a bytestring."""
|
|
103
|
+
namespace = f'pdf{variant}id'
|
|
104
|
+
rdf = Element(f'{{{NS["rdf"]}}}RDF')
|
|
105
|
+
|
|
106
|
+
if version:
|
|
107
|
+
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
|
108
|
+
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
|
109
|
+
element.attrib[f'{{{NS[namespace]}}}part'] = str(version)
|
|
110
|
+
if conformance:
|
|
111
|
+
assert version
|
|
112
|
+
if variant == 'x':
|
|
113
|
+
for key in (
|
|
114
|
+
f'{{{NS["pdfxid"]}}}GTS_PDFXVersion',
|
|
115
|
+
f'{{{NS["pdfx"]}}}GTS_PDFXVersion',
|
|
116
|
+
f'{{{NS["pdfx"]}}}GTS_PDFXConformance',
|
|
117
|
+
):
|
|
118
|
+
subelement = SubElement(element, key)
|
|
119
|
+
subelement.text = conformance
|
|
120
|
+
subelement = SubElement(element, f'{{{NS["pdf"]}}}Trapped')
|
|
121
|
+
subelement.text = 'False'
|
|
122
|
+
if version >= 4:
|
|
123
|
+
# TODO: these values could be useful instead of using random values.
|
|
124
|
+
assert self.modified
|
|
125
|
+
subelement = SubElement(element, f'{{{NS["xmp"]}}}MetadataDate')
|
|
126
|
+
subelement.text = self.modified
|
|
127
|
+
subelement = SubElement(element, f'{{{NS["xmpMM"]}}}DocumentID')
|
|
128
|
+
subelement.text = f'xmp.did:{uuid4()}'
|
|
129
|
+
subelement = SubElement(element, f'{{{NS["xmpMM"]}}}RenditionClass')
|
|
130
|
+
subelement.text = 'proof:pdf'
|
|
131
|
+
subelement = SubElement(element, f'{{{NS["xmpMM"]}}}VersionID')
|
|
132
|
+
subelement.text = '1'
|
|
133
|
+
else:
|
|
134
|
+
element.attrib[f'{{{NS[namespace]}}}conformance'] = conformance
|
|
135
|
+
if variant == 'a' and version == 4:
|
|
136
|
+
subelement = SubElement(element, f'{{{NS["pdfaid"]}}}rev')
|
|
137
|
+
subelement.text = '2020'
|
|
50
138
|
|
|
51
|
-
if version:
|
|
52
|
-
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
|
53
|
-
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
|
54
|
-
element.attrib[f'{{{NS[namespace]}}}part'] = str(version)
|
|
55
|
-
if conformance:
|
|
56
|
-
assert version
|
|
57
|
-
if variant == 'x':
|
|
58
|
-
for key in (
|
|
59
|
-
f'{{{NS["pdfxid"]}}}GTS_PDFXVersion',
|
|
60
|
-
f'{{{NS["pdfx"]}}}GTS_PDFXVersion',
|
|
61
|
-
f'{{{NS["pdfx"]}}}GTS_PDFXConformance',
|
|
62
|
-
):
|
|
63
|
-
subelement = SubElement(element, key)
|
|
64
|
-
subelement.text = conformance
|
|
65
|
-
subelement = SubElement(element, f'{{{NS["pdf"]}}}Trapped')
|
|
66
|
-
subelement.text = 'False'
|
|
67
|
-
if version >= 4:
|
|
68
|
-
# TODO: these values could be useful instead of using random values.
|
|
69
|
-
assert metadata.modified
|
|
70
|
-
subelement = SubElement(element, f'{{{NS["xmp"]}}}MetadataDate')
|
|
71
|
-
subelement.text = metadata.modified
|
|
72
|
-
subelement = SubElement(element, f'{{{NS["xmpMM"]}}}DocumentID')
|
|
73
|
-
subelement.text = f'xmp.did:{uuid4()}'
|
|
74
|
-
subelement = SubElement(element, f'{{{NS["xmpMM"]}}}RenditionClass')
|
|
75
|
-
subelement.text = 'proof:pdf'
|
|
76
|
-
subelement = SubElement(element, f'{{{NS["xmpMM"]}}}VersionID')
|
|
77
|
-
subelement.text = '1'
|
|
78
|
-
else:
|
|
79
|
-
element.attrib[f'{{{NS[namespace]}}}conformance'] = conformance
|
|
80
|
-
if variant == 'a' and version == 4:
|
|
81
|
-
subelement = SubElement(element, f'{{{NS["pdfaid"]}}}rev')
|
|
82
|
-
subelement.text = '2020'
|
|
83
|
-
|
|
84
|
-
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
|
85
|
-
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
|
86
|
-
element.attrib[f'{{{NS["pdf"]}}}Producer'] = f'WeasyPrint {__version__}'
|
|
87
|
-
|
|
88
|
-
if metadata.title:
|
|
89
|
-
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
|
90
|
-
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
|
91
|
-
element = SubElement(element, f'{{{NS["dc"]}}}title')
|
|
92
|
-
element = SubElement(element, f'{{{NS["rdf"]}}}Alt')
|
|
93
|
-
element = SubElement(element, f'{{{NS["rdf"]}}}li')
|
|
94
|
-
element.attrib['xml:lang'] = 'x-default'
|
|
95
|
-
element.text = metadata.title
|
|
96
|
-
if metadata.authors:
|
|
97
|
-
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
|
98
|
-
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
|
99
|
-
element = SubElement(element, f'{{{NS["dc"]}}}creator')
|
|
100
|
-
element = SubElement(element, f'{{{NS["rdf"]}}}Seq')
|
|
101
|
-
for author in metadata.authors:
|
|
102
|
-
author_element = SubElement(element, f'{{{NS["rdf"]}}}li')
|
|
103
|
-
author_element.text = author
|
|
104
|
-
if metadata.description:
|
|
105
|
-
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
|
106
|
-
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
|
107
|
-
element = SubElement(element, f'{{{NS["dc"]}}}subject')
|
|
108
|
-
element = SubElement(element, f'{{{NS["rdf"]}}}Bag')
|
|
109
|
-
element = SubElement(element, f'{{{NS["rdf"]}}}li')
|
|
110
|
-
element.attrib['xml:lang'] = 'x-default'
|
|
111
|
-
element.text = metadata.description
|
|
112
|
-
if metadata.keywords:
|
|
113
|
-
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
|
114
|
-
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
|
115
|
-
element = SubElement(element, f'{{{NS["pdf"]}}}Keywords')
|
|
116
|
-
element.text = ', '.join(metadata.keywords)
|
|
117
|
-
if metadata.generator:
|
|
118
|
-
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
|
119
|
-
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
|
120
|
-
element = SubElement(element, f'{{{NS["xmp"]}}}CreatorTool')
|
|
121
|
-
element.text = metadata.generator
|
|
122
|
-
if metadata.created:
|
|
123
|
-
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
|
124
|
-
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
|
125
|
-
element = SubElement(element, f'{{{NS["xmp"]}}}CreateDate')
|
|
126
|
-
element.text = metadata.created
|
|
127
|
-
if metadata.modified:
|
|
128
139
|
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
|
129
140
|
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
|
130
|
-
element
|
|
131
|
-
|
|
132
|
-
|
|
141
|
+
element.attrib[f'{{{NS["pdf"]}}}Producer'] = f'WeasyPrint {__version__}'
|
|
142
|
+
|
|
143
|
+
if self.title:
|
|
144
|
+
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
|
145
|
+
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
|
146
|
+
element = SubElement(element, f'{{{NS["dc"]}}}title')
|
|
147
|
+
element = SubElement(element, f'{{{NS["rdf"]}}}Alt')
|
|
148
|
+
element = SubElement(element, f'{{{NS["rdf"]}}}li')
|
|
149
|
+
element.attrib['xml:lang'] = 'x-default'
|
|
150
|
+
element.text = self.title
|
|
151
|
+
if self.authors:
|
|
152
|
+
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
|
153
|
+
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
|
154
|
+
element = SubElement(element, f'{{{NS["dc"]}}}creator')
|
|
155
|
+
element = SubElement(element, f'{{{NS["rdf"]}}}Seq')
|
|
156
|
+
for author in self.authors:
|
|
157
|
+
author_element = SubElement(element, f'{{{NS["rdf"]}}}li')
|
|
158
|
+
author_element.text = author
|
|
159
|
+
if self.description:
|
|
160
|
+
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
|
161
|
+
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
|
162
|
+
element = SubElement(element, f'{{{NS["dc"]}}}subject')
|
|
163
|
+
element = SubElement(element, f'{{{NS["rdf"]}}}Bag')
|
|
164
|
+
element = SubElement(element, f'{{{NS["rdf"]}}}li')
|
|
165
|
+
element.attrib['xml:lang'] = 'x-default'
|
|
166
|
+
element.text = self.description
|
|
167
|
+
if self.keywords:
|
|
168
|
+
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
|
169
|
+
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
|
170
|
+
element = SubElement(element, f'{{{NS["pdf"]}}}Keywords')
|
|
171
|
+
element.text = ', '.join(self.keywords)
|
|
172
|
+
if self.generator:
|
|
173
|
+
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
|
174
|
+
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
|
175
|
+
element = SubElement(element, f'{{{NS["xmp"]}}}CreatorTool')
|
|
176
|
+
element.text = self.generator
|
|
177
|
+
if self.created:
|
|
178
|
+
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
|
179
|
+
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
|
180
|
+
element = SubElement(element, f'{{{NS["xmp"]}}}CreateDate')
|
|
181
|
+
element.text = self.created
|
|
182
|
+
if self.modified:
|
|
183
|
+
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
|
184
|
+
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
|
185
|
+
element = SubElement(element, f'{{{NS["xmp"]}}}ModifyDate')
|
|
186
|
+
element.text = self.modified
|
|
187
|
+
return tostring(rdf, encoding='utf-8')
|
weasyprint/pdf/pdfa.py
CHANGED
|
@@ -4,8 +4,6 @@ from functools import partial
|
|
|
4
4
|
|
|
5
5
|
import pydyf
|
|
6
6
|
|
|
7
|
-
from .metadata import add_metadata
|
|
8
|
-
|
|
9
7
|
|
|
10
8
|
def pdfa(pdf, metadata, document, page_streams, attachments, compress,
|
|
11
9
|
version, variant):
|
|
@@ -65,7 +63,7 @@ def pdfa(pdf, metadata, document, page_streams, attachments, compress,
|
|
|
65
63
|
if version == 1:
|
|
66
64
|
# Metadata compression is forbidden for version 1.
|
|
67
65
|
compress = False
|
|
68
|
-
|
|
66
|
+
metadata.include_in_pdf(pdf, 'a', version, variant, compress)
|
|
69
67
|
|
|
70
68
|
# Remove document information.
|
|
71
69
|
if version >= 4:
|
weasyprint/pdf/pdfua.py
CHANGED
|
@@ -2,13 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
from functools import partial
|
|
4
4
|
|
|
5
|
-
from .metadata import add_metadata
|
|
6
|
-
|
|
7
5
|
|
|
8
6
|
def pdfua(pdf, metadata, document, page_streams, attachments, compress, version):
|
|
9
7
|
"""Set metadata for PDF/UA documents."""
|
|
10
8
|
# Common PDF metadata stream
|
|
11
|
-
|
|
9
|
+
metadata.include_in_pdf(pdf, 'ua', version, conformance=None, compress=compress)
|
|
12
10
|
|
|
13
11
|
|
|
14
12
|
VARIANTS = {
|
weasyprint/pdf/pdfx.py
CHANGED
|
@@ -5,8 +5,6 @@ from time import localtime
|
|
|
5
5
|
|
|
6
6
|
import pydyf
|
|
7
7
|
|
|
8
|
-
from .metadata import add_metadata
|
|
9
|
-
|
|
10
8
|
|
|
11
9
|
def pdfx(pdf, metadata, document, page_streams, attachments, compress, version,
|
|
12
10
|
variant):
|
|
@@ -47,7 +45,7 @@ def pdfx(pdf, metadata, document, page_streams, attachments, compress, version,
|
|
|
47
45
|
])
|
|
48
46
|
|
|
49
47
|
# Common PDF metadata stream.
|
|
50
|
-
|
|
48
|
+
metadata.include_in_pdf(pdf, 'x', version, conformance, compress=compress)
|
|
51
49
|
|
|
52
50
|
|
|
53
51
|
VARIANTS = {
|