weasyprint 65.1__py3-none-any.whl → 67.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- weasyprint/__init__.py +17 -7
- weasyprint/__main__.py +21 -10
- weasyprint/anchors.py +4 -4
- weasyprint/css/__init__.py +732 -67
- weasyprint/css/computed_values.py +65 -170
- weasyprint/css/counters.py +1 -1
- weasyprint/css/functions.py +206 -0
- weasyprint/css/html5_ua.css +3 -7
- weasyprint/css/html5_ua_form.css +2 -2
- weasyprint/css/media_queries.py +3 -1
- weasyprint/css/properties.py +6 -2
- weasyprint/css/{utils.py → tokens.py} +306 -397
- weasyprint/css/units.py +91 -0
- weasyprint/css/validation/__init__.py +1 -1
- weasyprint/css/validation/descriptors.py +47 -19
- weasyprint/css/validation/expanders.py +7 -8
- weasyprint/css/validation/properties.py +341 -357
- weasyprint/document.py +20 -19
- weasyprint/draw/__init__.py +56 -63
- weasyprint/draw/border.py +121 -69
- weasyprint/draw/color.py +1 -1
- weasyprint/draw/text.py +60 -41
- weasyprint/formatting_structure/boxes.py +24 -5
- weasyprint/formatting_structure/build.py +33 -45
- weasyprint/images.py +76 -62
- weasyprint/layout/__init__.py +32 -26
- weasyprint/layout/absolute.py +7 -6
- weasyprint/layout/background.py +7 -7
- weasyprint/layout/block.py +195 -152
- weasyprint/layout/column.py +19 -24
- weasyprint/layout/flex.py +54 -26
- weasyprint/layout/float.py +12 -7
- weasyprint/layout/grid.py +284 -90
- weasyprint/layout/inline.py +121 -68
- weasyprint/layout/page.py +45 -12
- weasyprint/layout/percent.py +14 -10
- weasyprint/layout/preferred.py +105 -63
- weasyprint/layout/replaced.py +9 -6
- weasyprint/layout/table.py +16 -9
- weasyprint/pdf/__init__.py +58 -18
- weasyprint/pdf/anchors.py +3 -4
- weasyprint/pdf/fonts.py +126 -69
- weasyprint/pdf/metadata.py +36 -4
- weasyprint/pdf/pdfa.py +19 -3
- weasyprint/pdf/pdfua.py +7 -115
- weasyprint/pdf/pdfx.py +83 -0
- weasyprint/pdf/stream.py +57 -49
- weasyprint/pdf/tags.py +307 -0
- weasyprint/stacking.py +14 -15
- weasyprint/svg/__init__.py +59 -32
- weasyprint/svg/bounding_box.py +4 -2
- weasyprint/svg/defs.py +4 -9
- weasyprint/svg/images.py +11 -3
- weasyprint/svg/text.py +11 -2
- weasyprint/svg/utils.py +15 -8
- weasyprint/text/constants.py +1 -1
- weasyprint/text/ffi.py +4 -3
- weasyprint/text/fonts.py +13 -5
- weasyprint/text/line_break.py +146 -43
- weasyprint/urls.py +41 -13
- {weasyprint-65.1.dist-info → weasyprint-67.0.dist-info}/METADATA +5 -6
- weasyprint-67.0.dist-info/RECORD +77 -0
- weasyprint/draw/stack.py +0 -13
- weasyprint-65.1.dist-info/RECORD +0 -74
- {weasyprint-65.1.dist-info → weasyprint-67.0.dist-info}/WHEEL +0 -0
- {weasyprint-65.1.dist-info → weasyprint-67.0.dist-info}/entry_points.txt +0 -0
- {weasyprint-65.1.dist-info → weasyprint-67.0.dist-info}/licenses/LICENSE +0 -0
weasyprint/pdf/anchors.py
CHANGED
|
@@ -16,7 +16,7 @@ from ..text.fonts import get_font_description
|
|
|
16
16
|
from ..urls import URLFetchingError
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
def add_links(links_and_anchors, matrix, pdf, page, names,
|
|
19
|
+
def add_links(links_and_anchors, matrix, pdf, page, names, tags):
|
|
20
20
|
"""Include hyperlinks in given PDF page."""
|
|
21
21
|
links, anchors = links_and_anchors
|
|
22
22
|
|
|
@@ -30,7 +30,7 @@ def add_links(links_and_anchors, matrix, pdf, page, names, mark):
|
|
|
30
30
|
'Rect': pydyf.Array([x1, y1, x2, y2]),
|
|
31
31
|
'BS': pydyf.Dictionary({'W': 0}),
|
|
32
32
|
})
|
|
33
|
-
if
|
|
33
|
+
if tags is not None:
|
|
34
34
|
box.link_annotation['Contents'] = pydyf.String(link_target)
|
|
35
35
|
if link_type == 'internal':
|
|
36
36
|
box.link_annotation['Dest'] = pydyf.String(link_target)
|
|
@@ -233,7 +233,7 @@ def add_forms(forms, matrix, pdf, page, resources, stream, font_map):
|
|
|
233
233
|
flags = 1 << (3 - 1) # HTML form format
|
|
234
234
|
if form.attrib.get('method', '').lower() != 'post':
|
|
235
235
|
flags += 1 << (4 - 1) # GET method
|
|
236
|
-
fields = pydyf.Array(
|
|
236
|
+
fields = pydyf.Array(field.reference for field in forms[form].values())
|
|
237
237
|
field['FT'] = '/Btn'
|
|
238
238
|
field['DA'] = pydyf.String(b' '.join(field_stream.stream))
|
|
239
239
|
field['V'] = pydyf.String(form.attrib.get('value', ''))
|
|
@@ -276,7 +276,6 @@ def add_forms(forms, matrix, pdf, page, resources, stream, font_map):
|
|
|
276
276
|
forms[form][input_name] = field
|
|
277
277
|
|
|
278
278
|
|
|
279
|
-
|
|
280
279
|
def add_annotations(links, matrix, document, pdf, page, annot_files, compress):
|
|
281
280
|
"""Include annotations in PDF."""
|
|
282
281
|
# TODO: splitting a link into multiple independent rectangular
|
weasyprint/pdf/fonts.py
CHANGED
|
@@ -9,7 +9,7 @@ from math import ceil
|
|
|
9
9
|
import pydyf
|
|
10
10
|
from fontTools import subset
|
|
11
11
|
from fontTools.ttLib import TTFont, TTLibError, ttFont
|
|
12
|
-
from fontTools.varLib.
|
|
12
|
+
from fontTools.varLib.instancer import instantiateVariableFont
|
|
13
13
|
|
|
14
14
|
from ..logger import LOGGER, capture_logs
|
|
15
15
|
from ..text.constants import PANGO_STRETCH_PERCENT
|
|
@@ -26,7 +26,8 @@ class Font:
|
|
|
26
26
|
|
|
27
27
|
self.font_size = font_size
|
|
28
28
|
self.style = pango.pango_font_description_get_style(description)
|
|
29
|
-
self.family = ffi.string(
|
|
29
|
+
self.family = ffi.string(
|
|
30
|
+
pango.pango_font_description_get_family(description)).decode()
|
|
30
31
|
|
|
31
32
|
self.variations = {}
|
|
32
33
|
variations = pango.pango_font_description_get_variations(description)
|
|
@@ -97,10 +98,12 @@ class Font:
|
|
|
97
98
|
self.upem = harfbuzz.hb_face_get_upem(self.hb_face)
|
|
98
99
|
self.png = harfbuzz.hb_ot_color_has_png(self.hb_face)
|
|
99
100
|
self.svg = harfbuzz.hb_ot_color_has_svg(self.hb_face)
|
|
101
|
+
self.glyph_count = harfbuzz.hb_face_get_glyph_count(self.hb_face)
|
|
100
102
|
self.stemv = 80
|
|
101
103
|
self.stemh = 80
|
|
102
104
|
self.widths = {}
|
|
103
|
-
self.
|
|
105
|
+
self.to_unicode = {}
|
|
106
|
+
self.missing = {}
|
|
104
107
|
self.used_in_forms = False
|
|
105
108
|
|
|
106
109
|
# Set font flags.
|
|
@@ -110,42 +113,50 @@ class Font:
|
|
|
110
113
|
if b'Serif' in name.split(b' '):
|
|
111
114
|
self.flags += 2 ** (2 - 1) # Serif
|
|
112
115
|
|
|
113
|
-
def
|
|
116
|
+
def get_unused_glyph_id(self, codepoint):
|
|
117
|
+
"""Get a glyph id that’s not used in the font, for given Unicode codepoint."""
|
|
118
|
+
if codepoint not in self.missing:
|
|
119
|
+
next_unused_glyph_id = self.glyph_count + len(self.missing)
|
|
120
|
+
if next_unused_glyph_id > 2 ** 16 - 1:
|
|
121
|
+
LOGGER.warning(
|
|
122
|
+
f'Too many glyphs missing from "{self.family}", '
|
|
123
|
+
'expect text selection problems')
|
|
124
|
+
next_unused_glyph_id = 2 ** 16 - 1
|
|
125
|
+
self.missing[codepoint] = next_unused_glyph_id
|
|
126
|
+
return self.missing[codepoint]
|
|
127
|
+
|
|
128
|
+
def clean(self, to_unicode, hinting):
|
|
114
129
|
"""Remove useless data from font."""
|
|
115
130
|
|
|
116
131
|
# Subset font.
|
|
117
|
-
self.subset(
|
|
132
|
+
self.subset(to_unicode, hinting)
|
|
118
133
|
|
|
119
134
|
# Transform variable into static font.
|
|
120
135
|
if 'fvar' in self.tables:
|
|
121
136
|
full_font = io.BytesIO(self.file_content)
|
|
122
137
|
ttfont = TTFont(full_font, fontNumber=self.index)
|
|
123
|
-
|
|
138
|
+
axes = {axis.axisTag: axis for axis in ttfont['fvar'].axes}
|
|
139
|
+
if 'wght' in axes and 'wght' not in self.variations:
|
|
124
140
|
self.variations['wght'] = self.weight
|
|
125
|
-
if 'opsz' not in self.variations:
|
|
141
|
+
if 'opsz' in axes and 'opsz' not in self.variations:
|
|
126
142
|
self.variations['opsz'] = self.font_size
|
|
127
|
-
if 'slnt' not in self.variations:
|
|
143
|
+
if 'slnt' in axes and 'slnt' not in self.variations:
|
|
128
144
|
slnt = 0
|
|
129
145
|
if self.style == 1:
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
else:
|
|
135
|
-
slnt = axe.maxValue
|
|
136
|
-
break
|
|
146
|
+
if axes['slnt'].maxValue == 0:
|
|
147
|
+
slnt = axes['slnt'].minValue
|
|
148
|
+
else:
|
|
149
|
+
slnt = axes['slnt'].maxValue
|
|
137
150
|
self.variations['slnt'] = slnt
|
|
138
|
-
if 'ital' not in self.variations:
|
|
151
|
+
if 'ital' in axes and 'ital' not in self.variations:
|
|
139
152
|
self.variations['ital'] = int(self.style == 2)
|
|
140
153
|
partial_font = io.BytesIO()
|
|
141
154
|
try:
|
|
142
|
-
ttfont = instantiateVariableFont(ttfont, self.variations)
|
|
143
|
-
for key, (advance, bearing) in ttfont['hmtx'].metrics.items():
|
|
144
|
-
if advance < 0:
|
|
145
|
-
ttfont['hmtx'].metrics[key] = (0, bearing)
|
|
155
|
+
ttfont = instantiateVariableFont(ttfont, self.variations, static=True)
|
|
146
156
|
ttfont.save(partial_font)
|
|
147
|
-
except Exception:
|
|
148
|
-
LOGGER.warning('Unable to
|
|
157
|
+
except Exception as exception:
|
|
158
|
+
LOGGER.warning(f'Unable to instantiate "{self.family}" variable font')
|
|
159
|
+
LOGGER.debug('Original exception:', exc_info=exception)
|
|
149
160
|
else:
|
|
150
161
|
self.file_content = partial_font.getvalue()
|
|
151
162
|
|
|
@@ -171,25 +182,26 @@ class Font:
|
|
|
171
182
|
output_font = io.BytesIO()
|
|
172
183
|
ttfont.save(output_font)
|
|
173
184
|
self.file_content = output_font.getvalue()
|
|
174
|
-
except TTLibError:
|
|
175
|
-
LOGGER.warning('Unable to save emoji font')
|
|
185
|
+
except TTLibError as exception:
|
|
186
|
+
LOGGER.warning(f'Unable to save emoji font "{self.family}"')
|
|
187
|
+
LOGGER.debug('Original exception:', exc_info=exception)
|
|
176
188
|
|
|
177
189
|
@property
|
|
178
190
|
def type(self):
|
|
179
191
|
return 'otf' if self.file_content[:4] == b'OTTO' else 'ttf'
|
|
180
192
|
|
|
181
|
-
def subset(self,
|
|
193
|
+
def subset(self, to_unicode, hinting):
|
|
182
194
|
"""Remove unused glyphs and tables from font."""
|
|
183
|
-
if not
|
|
195
|
+
if not to_unicode:
|
|
184
196
|
return
|
|
185
197
|
|
|
186
198
|
if harfbuzz_subset and harfbuzz.hb_version_atleast(4, 1, 0):
|
|
187
199
|
# 4.1.0 is required for hb_set_add_sorted_array.
|
|
188
|
-
self._harfbuzz_subset(
|
|
200
|
+
self._harfbuzz_subset(to_unicode, hinting)
|
|
189
201
|
else:
|
|
190
|
-
self._fonttools_subset(
|
|
202
|
+
self._fonttools_subset(to_unicode, hinting)
|
|
191
203
|
|
|
192
|
-
def _harfbuzz_subset(self,
|
|
204
|
+
def _harfbuzz_subset(self, to_unicode, hinting):
|
|
193
205
|
"""Subset font using Harfbuzz."""
|
|
194
206
|
hb_subset = ffi.gc(
|
|
195
207
|
harfbuzz_subset.hb_subset_input_create_or_fail(),
|
|
@@ -197,14 +209,16 @@ class Font:
|
|
|
197
209
|
|
|
198
210
|
# Only keep used glyphs.
|
|
199
211
|
gid_set = harfbuzz_subset.hb_subset_input_glyph_set(hb_subset)
|
|
200
|
-
gid_array = ffi.new(f'hb_codepoint_t[{len(
|
|
201
|
-
harfbuzz.hb_set_add_sorted_array(gid_set, gid_array, len(
|
|
212
|
+
gid_array = ffi.new(f'hb_codepoint_t[{len(to_unicode)}]', sorted(to_unicode))
|
|
213
|
+
harfbuzz.hb_set_add_sorted_array(gid_set, gid_array, len(to_unicode))
|
|
202
214
|
|
|
203
215
|
# Set flags.
|
|
204
216
|
flags = (
|
|
205
217
|
harfbuzz_subset.HB_SUBSET_FLAGS_RETAIN_GIDS |
|
|
206
218
|
harfbuzz_subset.HB_SUBSET_FLAGS_PASSTHROUGH_UNRECOGNIZED |
|
|
207
219
|
harfbuzz_subset.HB_SUBSET_FLAGS_DESUBROUTINIZE)
|
|
220
|
+
if self.missing:
|
|
221
|
+
flags |= harfbuzz_subset.HB_SUBSET_FLAGS_NOTDEF_OUTLINE
|
|
208
222
|
harfbuzz_subset.hb_subset_input_set_flags(hb_subset, flags)
|
|
209
223
|
|
|
210
224
|
# Drop useless tables.
|
|
@@ -223,7 +237,7 @@ class Font:
|
|
|
223
237
|
|
|
224
238
|
# Drop empty glyphs after last one used.
|
|
225
239
|
gid_set = harfbuzz_subset.hb_subset_input_glyph_set(hb_subset)
|
|
226
|
-
keep = tuple(range(max(
|
|
240
|
+
keep = tuple(range(max(to_unicode) + 1))
|
|
227
241
|
gid_array = ffi.new(f'hb_codepoint_t[{len(keep)}]', keep)
|
|
228
242
|
harfbuzz.hb_set_add_sorted_array(gid_set, gid_array, len(keep))
|
|
229
243
|
|
|
@@ -233,6 +247,8 @@ class Font:
|
|
|
233
247
|
harfbuzz_subset.HB_SUBSET_FLAGS_DESUBROUTINIZE)
|
|
234
248
|
if not hinting:
|
|
235
249
|
flags |= harfbuzz_subset.HB_SUBSET_FLAGS_NO_HINTING
|
|
250
|
+
if self.missing:
|
|
251
|
+
flags |= harfbuzz_subset.HB_SUBSET_FLAGS_NOTDEF_OUTLINE
|
|
236
252
|
harfbuzz_subset.hb_subset_input_set_flags(hb_subset, flags)
|
|
237
253
|
|
|
238
254
|
# Subset font.
|
|
@@ -247,19 +263,19 @@ class Font:
|
|
|
247
263
|
self.file_content = file_content
|
|
248
264
|
return
|
|
249
265
|
|
|
250
|
-
LOGGER.warning('Unable to subset
|
|
266
|
+
LOGGER.warning(f'Unable to subset "{self.family}" with HarfBuzz')
|
|
251
267
|
|
|
252
|
-
def _fonttools_subset(self,
|
|
268
|
+
def _fonttools_subset(self, to_unicode, hinting):
|
|
253
269
|
"""Subset font using Fonttools."""
|
|
254
270
|
full_font = io.BytesIO(self.file_content)
|
|
255
271
|
|
|
256
272
|
# Set subset options.
|
|
257
273
|
options = subset.Options(
|
|
258
274
|
retain_gids=True, passthrough_tables=True, ignore_missing_glyphs=True,
|
|
259
|
-
hinting=hinting, desubroutinize=True)
|
|
275
|
+
hinting=hinting, desubroutinize=True, notdef_outline=bool(self.missing))
|
|
260
276
|
options.drop_tables += ['GSUB', 'GPOS', 'SVG']
|
|
261
277
|
subsetter = subset.Subsetter(options)
|
|
262
|
-
subsetter.populate(gids=
|
|
278
|
+
subsetter.populate(gids=to_unicode)
|
|
263
279
|
|
|
264
280
|
# Subset font.
|
|
265
281
|
try:
|
|
@@ -268,10 +284,11 @@ class Font:
|
|
|
268
284
|
subsetter.subset(ttfont)
|
|
269
285
|
for log in logs:
|
|
270
286
|
LOGGER.warning(
|
|
271
|
-
'fontTools warning when subsetting
|
|
272
|
-
self.family
|
|
273
|
-
except TTLibError:
|
|
274
|
-
LOGGER.warning('Unable to subset
|
|
287
|
+
'fontTools warning when subsetting '
|
|
288
|
+
f'"{self.family}": {log}')
|
|
289
|
+
except TTLibError as exception:
|
|
290
|
+
LOGGER.warning(f'Unable to subset "{self.family}" with fontTools')
|
|
291
|
+
LOGGER.debug('Original exception:', exc_info=exception)
|
|
275
292
|
else:
|
|
276
293
|
optimized_font = io.BytesIO()
|
|
277
294
|
ttfont.save(optimized_font)
|
|
@@ -292,11 +309,11 @@ def build_fonts_dictionary(pdf, fonts, compress, subset, options):
|
|
|
292
309
|
continue
|
|
293
310
|
|
|
294
311
|
# Clean font, optimize and handle emojis.
|
|
295
|
-
|
|
312
|
+
to_unicode = {}
|
|
296
313
|
if subset and not font.used_in_forms:
|
|
297
314
|
for file_font in file_fonts:
|
|
298
|
-
|
|
299
|
-
font.clean(
|
|
315
|
+
to_unicode = {**to_unicode, **file_font.to_unicode}
|
|
316
|
+
font.clean(to_unicode, options['hinting'])
|
|
300
317
|
|
|
301
318
|
# Include font.
|
|
302
319
|
if font.type == 'otf':
|
|
@@ -311,20 +328,20 @@ def build_fonts_dictionary(pdf, fonts, compress, subset, options):
|
|
|
311
328
|
if subset and not font.used_in_forms:
|
|
312
329
|
# Only store widths and map for used glyphs
|
|
313
330
|
font_widths = font.widths
|
|
314
|
-
|
|
331
|
+
to_unicode = font.to_unicode
|
|
315
332
|
else:
|
|
316
333
|
# Store width and Unicode map for all glyphs
|
|
317
334
|
full_font = io.BytesIO(font.file_content)
|
|
318
335
|
ttfont = TTFont(full_font, fontNumber=font.index)
|
|
319
|
-
font_widths,
|
|
336
|
+
font_widths, to_unicode = {}, {}
|
|
320
337
|
for i, glyph in enumerate(ttfont.getGlyphSet().values()):
|
|
321
338
|
font_widths[i] = glyph.width * 1000 / font.upem
|
|
322
339
|
for letter, key in ttfont.getBestCmap().items():
|
|
323
|
-
|
|
324
|
-
if
|
|
325
|
-
|
|
340
|
+
glyph_id = ttfont.getGlyphID(key)
|
|
341
|
+
if glyph_id not in to_unicode:
|
|
342
|
+
to_unicode[glyph_id] = chr(letter)
|
|
326
343
|
|
|
327
|
-
|
|
344
|
+
to_unicode_object = pydyf.Stream([
|
|
328
345
|
b'/CIDInit /ProcSet findresource begin',
|
|
329
346
|
b'12 dict begin',
|
|
330
347
|
b'begincmap',
|
|
@@ -338,28 +355,29 @@ def build_fonts_dictionary(pdf, fonts, compress, subset, options):
|
|
|
338
355
|
b'1 begincodespacerange',
|
|
339
356
|
b'<0000> <ffff>',
|
|
340
357
|
b'endcodespacerange'], compress=compress)
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
358
|
+
to_unicode_stream = to_unicode_object.stream
|
|
359
|
+
to_unicode_length = len(to_unicode)
|
|
360
|
+
to_unicode_items = tuple(to_unicode.items())
|
|
361
|
+
for i in range(ceil(to_unicode_length / 100)):
|
|
362
|
+
batch_length = min(100, to_unicode_length - i * 100)
|
|
363
|
+
to_unicode_stream.append(f'{batch_length} beginbfchar'.encode())
|
|
364
|
+
for glyph, text in to_unicode_items[i*100:(i+1)*100]:
|
|
347
365
|
unicode_codepoints = ''.join(
|
|
348
366
|
f'{letter.encode("utf-16-be").hex()}' for letter in text)
|
|
349
|
-
|
|
367
|
+
to_unicode_stream.append(
|
|
350
368
|
f'<{glyph:04x}> <{unicode_codepoints}>'.encode())
|
|
351
|
-
|
|
352
|
-
|
|
369
|
+
to_unicode_stream.append(b'endbfchar')
|
|
370
|
+
to_unicode_stream.extend([
|
|
353
371
|
b'endcmap',
|
|
354
372
|
b'CMapName currentdict /CMap defineresource pop',
|
|
355
373
|
b'end',
|
|
356
374
|
b'end'])
|
|
357
|
-
pdf.add_object(
|
|
375
|
+
pdf.add_object(to_unicode_object)
|
|
358
376
|
font_dictionary = pydyf.Dictionary({
|
|
359
377
|
'Type': '/Font',
|
|
360
378
|
'Subtype': f'/Type{3 if font.bitmap else 0}',
|
|
361
379
|
'BaseFont': font.name,
|
|
362
|
-
'ToUnicode':
|
|
380
|
+
'ToUnicode': to_unicode_object.reference,
|
|
363
381
|
})
|
|
364
382
|
|
|
365
383
|
if font.bitmap:
|
|
@@ -380,7 +398,7 @@ def _build_bitmap_font_dictionary(font_dictionary, pdf, font, widths, compress,
|
|
|
380
398
|
font_dictionary['FontBBox'] = pydyf.Array([0, 0, 1, 1])
|
|
381
399
|
font_dictionary['FontMatrix'] = pydyf.Array([1, 0, 0, 1, 0, 0])
|
|
382
400
|
if subset:
|
|
383
|
-
chars = tuple(sorted(font.
|
|
401
|
+
chars = tuple(sorted(font.to_unicode))
|
|
384
402
|
else:
|
|
385
403
|
chars = tuple(range(256))
|
|
386
404
|
first, last = chars[0], chars[-1]
|
|
@@ -419,7 +437,8 @@ def _build_bitmap_font_dictionary(font_dictionary, pdf, font, widths, compress,
|
|
|
419
437
|
bearing_y = subtable.metrics.horiBearingY
|
|
420
438
|
break
|
|
421
439
|
else:
|
|
422
|
-
LOGGER.warning(
|
|
440
|
+
LOGGER.warning(
|
|
441
|
+
f'Unknown bitmap metrics in "{font.family}" for glyph: {glyph_id}')
|
|
423
442
|
continue
|
|
424
443
|
else:
|
|
425
444
|
data_start = 5 if glyph_format in (1, 2, 8) else 8
|
|
@@ -466,11 +485,12 @@ def _build_bitmap_font_dictionary(font_dictionary, pdf, font, widths, compress,
|
|
|
466
485
|
y = int.from_bytes(data[index+3:index+4], 'big', signed=True)
|
|
467
486
|
subglyphs.append({'id': subglyph_id, 'x': x, 'y': y})
|
|
468
487
|
else: # pragma: no cover
|
|
469
|
-
LOGGER.warning(
|
|
488
|
+
LOGGER.warning(
|
|
489
|
+
f'Unsupported bitmap glyph format in "{font.family}": {glyph_format}')
|
|
470
490
|
glyph_info['bitmap'] = bytes(height * stride)
|
|
471
491
|
|
|
472
492
|
for glyph_id, glyph_info in glyphs_info.items():
|
|
473
|
-
# Don’t store glyph not in
|
|
493
|
+
# Don’t store glyph not in to_unicode.
|
|
474
494
|
if glyph_id not in chars:
|
|
475
495
|
continue
|
|
476
496
|
|
|
@@ -488,12 +508,14 @@ def _build_bitmap_font_dictionary(font_dictionary, pdf, font, widths, compress,
|
|
|
488
508
|
sub_y = subglyph['y']
|
|
489
509
|
sub_id = subglyph['id']
|
|
490
510
|
if sub_id not in glyphs_info:
|
|
491
|
-
LOGGER.warning(f'Unknown subglyph: {sub_id}')
|
|
511
|
+
LOGGER.warning(f'Unknown subglyph in "{font.family}": {sub_id}')
|
|
492
512
|
continue
|
|
493
513
|
subglyph = glyphs_info[sub_id]
|
|
494
514
|
if subglyph['bitmap'] is None:
|
|
495
515
|
# TODO: Support subglyph in subglyph.
|
|
496
|
-
LOGGER.warning(
|
|
516
|
+
LOGGER.warning(
|
|
517
|
+
'Unsupported subglyph in subglyph in '
|
|
518
|
+
f'"{font.family}": {sub_id}')
|
|
497
519
|
continue
|
|
498
520
|
for row_y in range(subglyph['height']):
|
|
499
521
|
row_slice = slice(
|
|
@@ -569,8 +591,6 @@ def _build_vector_font_dictionary(font_dictionary, pdf, font, widths, compress,
|
|
|
569
591
|
compress=compress)
|
|
570
592
|
pdf.add_object(stream)
|
|
571
593
|
font_descriptor['CIDSet'] = stream.reference
|
|
572
|
-
if font.type == 'otf':
|
|
573
|
-
font_descriptor['Subtype'] = '/OpenType'
|
|
574
594
|
pdf.add_object(font_descriptor)
|
|
575
595
|
|
|
576
596
|
pdf_widths = pydyf.Array()
|
|
@@ -595,5 +615,42 @@ def _build_vector_font_dictionary(font_dictionary, pdf, font, widths, compress,
|
|
|
595
615
|
'FontDescriptor': font_descriptor.reference,
|
|
596
616
|
})
|
|
597
617
|
pdf.add_object(subfont_dictionary)
|
|
598
|
-
|
|
618
|
+
if font.missing:
|
|
619
|
+
# Add CMap that doesn’t include missing glyphs, so that they can be replaced by
|
|
620
|
+
# .notdef.
|
|
621
|
+
encoding = pydyf.Stream([
|
|
622
|
+
b'/CIDInit /ProcSet findresource begin',
|
|
623
|
+
b'12 dict begin',
|
|
624
|
+
b'begincmap',
|
|
625
|
+
b'/CIDSystemInfo',
|
|
626
|
+
b'3 dict dup begin',
|
|
627
|
+
b'/Registry (WP) def',
|
|
628
|
+
b'/Ordering (Encod) def',
|
|
629
|
+
b'/Supplement 0 def',
|
|
630
|
+
b'end def',
|
|
631
|
+
b'/CMapName /WP-Encod-0 def',
|
|
632
|
+
b'/CMapType 1 def',
|
|
633
|
+
b'1 begincodespacerange',
|
|
634
|
+
b'<0000> <ffff>',
|
|
635
|
+
b'endcodespacerange',
|
|
636
|
+
], compress=compress)
|
|
637
|
+
available = tuple(font.to_unicode)
|
|
638
|
+
available_length = len(available)
|
|
639
|
+
for i in range(ceil(available_length / 100)):
|
|
640
|
+
batch_length = min(100, available_length - i * 100)
|
|
641
|
+
encoding.stream.append(f'{batch_length} begincidchar'.encode())
|
|
642
|
+
for glyph_id in available[i*100:(i+1)*100]:
|
|
643
|
+
font_glyph_id = 0 if glyph_id in font.missing.values() else glyph_id
|
|
644
|
+
encoding.stream.append(f'<{glyph_id:04x}> {font_glyph_id}'.encode())
|
|
645
|
+
encoding.stream.append(b'endcidchar')
|
|
646
|
+
encoding.stream.extend([
|
|
647
|
+
b'endcmap',
|
|
648
|
+
b'CMapName currentdict /CMap defineresource pop',
|
|
649
|
+
b'end',
|
|
650
|
+
b'end'])
|
|
651
|
+
pdf.add_object(encoding)
|
|
652
|
+
font_dictionary['Encoding'] = encoding.reference
|
|
653
|
+
else:
|
|
654
|
+
# No missing glyph in this font, use the identity mapping to map all glyphs.
|
|
655
|
+
font_dictionary['Encoding'] = '/Identity-H'
|
|
599
656
|
font_dictionary['DescendantFonts'] = pydyf.Array([subfont_dictionary.reference])
|
weasyprint/pdf/metadata.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""PDF metadata stream generation."""
|
|
2
2
|
|
|
3
|
+
from uuid import uuid4
|
|
3
4
|
from xml.etree.ElementTree import Element, SubElement, register_namespace, tostring
|
|
4
5
|
|
|
5
6
|
import pydyf
|
|
@@ -11,9 +12,12 @@ NS = {
|
|
|
11
12
|
'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
|
|
12
13
|
'dc': 'http://purl.org/dc/elements/1.1/',
|
|
13
14
|
'xmp': 'http://ns.adobe.com/xap/1.0/',
|
|
15
|
+
'xmpMM': 'http://ns.adobe.com/xap/1.0/mm/',
|
|
14
16
|
'pdf': 'http://ns.adobe.com/pdf/1.3/',
|
|
15
17
|
'pdfaid': 'http://www.aiim.org/pdfa/ns/id/',
|
|
16
18
|
'pdfuaid': 'http://www.aiim.org/pdfua/ns/id/',
|
|
19
|
+
'pdfxid': 'http://www.npes.org/pdfx/ns/id/',
|
|
20
|
+
'pdfx': 'http://ns.adobe.com/pdfx/1.3/',
|
|
17
21
|
}
|
|
18
22
|
for key, value in NS.items():
|
|
19
23
|
register_namespace(key, value)
|
|
@@ -44,11 +48,38 @@ def generate_rdf_metadata(metadata, variant, version, conformance):
|
|
|
44
48
|
namespace = f'pdf{variant}id'
|
|
45
49
|
rdf = Element(f'{{{NS["rdf"]}}}RDF')
|
|
46
50
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
51
|
+
if version:
|
|
52
|
+
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
|
53
|
+
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
|
54
|
+
element.attrib[f'{{{NS[namespace]}}}part'] = str(version)
|
|
50
55
|
if conformance:
|
|
51
|
-
|
|
56
|
+
assert version
|
|
57
|
+
if variant == 'x':
|
|
58
|
+
for key in (
|
|
59
|
+
f'{{{NS["pdfxid"]}}}GTS_PDFXVersion',
|
|
60
|
+
f'{{{NS["pdfx"]}}}GTS_PDFXVersion',
|
|
61
|
+
f'{{{NS["pdfx"]}}}GTS_PDFXConformance',
|
|
62
|
+
):
|
|
63
|
+
subelement = SubElement(element, key)
|
|
64
|
+
subelement.text = conformance
|
|
65
|
+
subelement = SubElement(element, f'{{{NS["pdf"]}}}Trapped')
|
|
66
|
+
subelement.text = 'False'
|
|
67
|
+
if version >= 4:
|
|
68
|
+
# TODO: these values could be useful instead of using random values.
|
|
69
|
+
assert metadata.modified
|
|
70
|
+
subelement = SubElement(element, f'{{{NS["xmp"]}}}MetadataDate')
|
|
71
|
+
subelement.text = metadata.modified
|
|
72
|
+
subelement = SubElement(element, f'{{{NS["xmpMM"]}}}DocumentID')
|
|
73
|
+
subelement.text = f'xmp.did:{uuid4()}'
|
|
74
|
+
subelement = SubElement(element, f'{{{NS["xmpMM"]}}}RenditionClass')
|
|
75
|
+
subelement.text = 'proof:pdf'
|
|
76
|
+
subelement = SubElement(element, f'{{{NS["xmpMM"]}}}VersionID')
|
|
77
|
+
subelement.text = '1'
|
|
78
|
+
else:
|
|
79
|
+
element.attrib[f'{{{NS[namespace]}}}conformance'] = conformance
|
|
80
|
+
if variant == 'a' and version == 4:
|
|
81
|
+
subelement = SubElement(element, f'{{{NS["pdfaid"]}}}rev')
|
|
82
|
+
subelement.text = '2020'
|
|
52
83
|
|
|
53
84
|
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
|
54
85
|
element.attrib[f'{{{NS["rdf"]}}}about'] = ''
|
|
@@ -76,6 +107,7 @@ def generate_rdf_metadata(metadata, variant, version, conformance):
|
|
|
76
107
|
element = SubElement(element, f'{{{NS["dc"]}}}subject')
|
|
77
108
|
element = SubElement(element, f'{{{NS["rdf"]}}}Bag')
|
|
78
109
|
element = SubElement(element, f'{{{NS["rdf"]}}}li')
|
|
110
|
+
element.attrib['xml:lang'] = 'x-default'
|
|
79
111
|
element.text = metadata.description
|
|
80
112
|
if metadata.keywords:
|
|
81
113
|
element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
|
weasyprint/pdf/pdfa.py
CHANGED
|
@@ -67,6 +67,10 @@ def pdfa(pdf, metadata, document, page_streams, attachments, compress,
|
|
|
67
67
|
compress = False
|
|
68
68
|
add_metadata(pdf, metadata, 'a', version, variant, compress)
|
|
69
69
|
|
|
70
|
+
# Remove document information.
|
|
71
|
+
if version >= 4:
|
|
72
|
+
pdf.info.clear()
|
|
73
|
+
|
|
70
74
|
|
|
71
75
|
VARIANTS = {
|
|
72
76
|
'pdf/a-1b': (
|
|
@@ -78,9 +82,6 @@ VARIANTS = {
|
|
|
78
82
|
'pdf/a-3b': (
|
|
79
83
|
partial(pdfa, version=3, variant='B'),
|
|
80
84
|
{'version': '1.7', 'identifier': True, 'srgb': True}),
|
|
81
|
-
'pdf/a-4b': (
|
|
82
|
-
partial(pdfa, version=4, variant='B'),
|
|
83
|
-
{'version': '2.0', 'identifier': True, 'srgb': True}),
|
|
84
85
|
'pdf/a-2u': (
|
|
85
86
|
partial(pdfa, version=2, variant='U'),
|
|
86
87
|
{'version': '1.7', 'identifier': True, 'srgb': True}),
|
|
@@ -90,4 +91,19 @@ VARIANTS = {
|
|
|
90
91
|
'pdf/a-4u': (
|
|
91
92
|
partial(pdfa, version=4, variant='U'),
|
|
92
93
|
{'version': '2.0', 'identifier': True, 'srgb': True}),
|
|
94
|
+
'pdf/a-1a': (
|
|
95
|
+
partial(pdfa, version=1, variant='A'),
|
|
96
|
+
{'version': '1.4', 'identifier': True, 'srgb': True, 'pdf_tags': True}),
|
|
97
|
+
'pdf/a-2a': (
|
|
98
|
+
partial(pdfa, version=2, variant='A'),
|
|
99
|
+
{'version': '1.7', 'identifier': True, 'srgb': True, 'pdf_tags': True}),
|
|
100
|
+
'pdf/a-3a': (
|
|
101
|
+
partial(pdfa, version=3, variant='A'),
|
|
102
|
+
{'version': '1.7', 'identifier': True, 'srgb': True, 'pdf_tags': True}),
|
|
103
|
+
'pdf/a-4e': (
|
|
104
|
+
partial(pdfa, version=4, variant='E'),
|
|
105
|
+
{'version': '2.0', 'identifier': True, 'srgb': True}),
|
|
106
|
+
'pdf/a-4f': (
|
|
107
|
+
partial(pdfa, version=4, variant='F'),
|
|
108
|
+
{'version': '2.0', 'identifier': True, 'srgb': True}),
|
|
93
109
|
}
|