weasyprint 65.1__py3-none-any.whl → 67.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. weasyprint/__init__.py +17 -7
  2. weasyprint/__main__.py +21 -10
  3. weasyprint/anchors.py +4 -4
  4. weasyprint/css/__init__.py +732 -67
  5. weasyprint/css/computed_values.py +65 -170
  6. weasyprint/css/counters.py +1 -1
  7. weasyprint/css/functions.py +206 -0
  8. weasyprint/css/html5_ua.css +3 -7
  9. weasyprint/css/html5_ua_form.css +2 -2
  10. weasyprint/css/media_queries.py +3 -1
  11. weasyprint/css/properties.py +6 -2
  12. weasyprint/css/{utils.py → tokens.py} +306 -397
  13. weasyprint/css/units.py +91 -0
  14. weasyprint/css/validation/__init__.py +1 -1
  15. weasyprint/css/validation/descriptors.py +47 -19
  16. weasyprint/css/validation/expanders.py +7 -8
  17. weasyprint/css/validation/properties.py +341 -357
  18. weasyprint/document.py +20 -19
  19. weasyprint/draw/__init__.py +56 -63
  20. weasyprint/draw/border.py +121 -69
  21. weasyprint/draw/color.py +1 -1
  22. weasyprint/draw/text.py +60 -41
  23. weasyprint/formatting_structure/boxes.py +24 -5
  24. weasyprint/formatting_structure/build.py +33 -45
  25. weasyprint/images.py +76 -62
  26. weasyprint/layout/__init__.py +32 -26
  27. weasyprint/layout/absolute.py +7 -6
  28. weasyprint/layout/background.py +7 -7
  29. weasyprint/layout/block.py +195 -152
  30. weasyprint/layout/column.py +19 -24
  31. weasyprint/layout/flex.py +54 -26
  32. weasyprint/layout/float.py +12 -7
  33. weasyprint/layout/grid.py +284 -90
  34. weasyprint/layout/inline.py +121 -68
  35. weasyprint/layout/page.py +45 -12
  36. weasyprint/layout/percent.py +14 -10
  37. weasyprint/layout/preferred.py +105 -63
  38. weasyprint/layout/replaced.py +9 -6
  39. weasyprint/layout/table.py +16 -9
  40. weasyprint/pdf/__init__.py +58 -18
  41. weasyprint/pdf/anchors.py +3 -4
  42. weasyprint/pdf/fonts.py +126 -69
  43. weasyprint/pdf/metadata.py +36 -4
  44. weasyprint/pdf/pdfa.py +19 -3
  45. weasyprint/pdf/pdfua.py +7 -115
  46. weasyprint/pdf/pdfx.py +83 -0
  47. weasyprint/pdf/stream.py +57 -49
  48. weasyprint/pdf/tags.py +307 -0
  49. weasyprint/stacking.py +14 -15
  50. weasyprint/svg/__init__.py +59 -32
  51. weasyprint/svg/bounding_box.py +4 -2
  52. weasyprint/svg/defs.py +4 -9
  53. weasyprint/svg/images.py +11 -3
  54. weasyprint/svg/text.py +11 -2
  55. weasyprint/svg/utils.py +15 -8
  56. weasyprint/text/constants.py +1 -1
  57. weasyprint/text/ffi.py +4 -3
  58. weasyprint/text/fonts.py +13 -5
  59. weasyprint/text/line_break.py +146 -43
  60. weasyprint/urls.py +41 -13
  61. {weasyprint-65.1.dist-info → weasyprint-67.0.dist-info}/METADATA +5 -6
  62. weasyprint-67.0.dist-info/RECORD +77 -0
  63. weasyprint/draw/stack.py +0 -13
  64. weasyprint-65.1.dist-info/RECORD +0 -74
  65. {weasyprint-65.1.dist-info → weasyprint-67.0.dist-info}/WHEEL +0 -0
  66. {weasyprint-65.1.dist-info → weasyprint-67.0.dist-info}/entry_points.txt +0 -0
  67. {weasyprint-65.1.dist-info → weasyprint-67.0.dist-info}/licenses/LICENSE +0 -0
weasyprint/pdf/anchors.py CHANGED
@@ -16,7 +16,7 @@ from ..text.fonts import get_font_description
16
16
  from ..urls import URLFetchingError
17
17
 
18
18
 
19
- def add_links(links_and_anchors, matrix, pdf, page, names, mark):
19
+ def add_links(links_and_anchors, matrix, pdf, page, names, tags):
20
20
  """Include hyperlinks in given PDF page."""
21
21
  links, anchors = links_and_anchors
22
22
 
@@ -30,7 +30,7 @@ def add_links(links_and_anchors, matrix, pdf, page, names, mark):
30
30
  'Rect': pydyf.Array([x1, y1, x2, y2]),
31
31
  'BS': pydyf.Dictionary({'W': 0}),
32
32
  })
33
- if mark:
33
+ if tags is not None:
34
34
  box.link_annotation['Contents'] = pydyf.String(link_target)
35
35
  if link_type == 'internal':
36
36
  box.link_annotation['Dest'] = pydyf.String(link_target)
@@ -233,7 +233,7 @@ def add_forms(forms, matrix, pdf, page, resources, stream, font_map):
233
233
  flags = 1 << (3 - 1) # HTML form format
234
234
  if form.attrib.get('method', '').lower() != 'post':
235
235
  flags += 1 << (4 - 1) # GET method
236
- fields = pydyf.Array((field.reference for field in forms[form].values()))
236
+ fields = pydyf.Array(field.reference for field in forms[form].values())
237
237
  field['FT'] = '/Btn'
238
238
  field['DA'] = pydyf.String(b' '.join(field_stream.stream))
239
239
  field['V'] = pydyf.String(form.attrib.get('value', ''))
@@ -276,7 +276,6 @@ def add_forms(forms, matrix, pdf, page, resources, stream, font_map):
276
276
  forms[form][input_name] = field
277
277
 
278
278
 
279
-
280
279
  def add_annotations(links, matrix, document, pdf, page, annot_files, compress):
281
280
  """Include annotations in PDF."""
282
281
  # TODO: splitting a link into multiple independent rectangular
weasyprint/pdf/fonts.py CHANGED
@@ -9,7 +9,7 @@ from math import ceil
9
9
  import pydyf
10
10
  from fontTools import subset
11
11
  from fontTools.ttLib import TTFont, TTLibError, ttFont
12
- from fontTools.varLib.mutator import instantiateVariableFont
12
+ from fontTools.varLib.instancer import instantiateVariableFont
13
13
 
14
14
  from ..logger import LOGGER, capture_logs
15
15
  from ..text.constants import PANGO_STRETCH_PERCENT
@@ -26,7 +26,8 @@ class Font:
26
26
 
27
27
  self.font_size = font_size
28
28
  self.style = pango.pango_font_description_get_style(description)
29
- self.family = ffi.string(pango.pango_font_description_get_family(description))
29
+ self.family = ffi.string(
30
+ pango.pango_font_description_get_family(description)).decode()
30
31
 
31
32
  self.variations = {}
32
33
  variations = pango.pango_font_description_get_variations(description)
@@ -97,10 +98,12 @@ class Font:
97
98
  self.upem = harfbuzz.hb_face_get_upem(self.hb_face)
98
99
  self.png = harfbuzz.hb_ot_color_has_png(self.hb_face)
99
100
  self.svg = harfbuzz.hb_ot_color_has_svg(self.hb_face)
101
+ self.glyph_count = harfbuzz.hb_face_get_glyph_count(self.hb_face)
100
102
  self.stemv = 80
101
103
  self.stemh = 80
102
104
  self.widths = {}
103
- self.cmap = {}
105
+ self.to_unicode = {}
106
+ self.missing = {}
104
107
  self.used_in_forms = False
105
108
 
106
109
  # Set font flags.
@@ -110,42 +113,50 @@ class Font:
110
113
  if b'Serif' in name.split(b' '):
111
114
  self.flags += 2 ** (2 - 1) # Serif
112
115
 
113
- def clean(self, cmap, hinting):
116
+ def get_unused_glyph_id(self, codepoint):
117
+ """Get a glyph id that’s not used in the font, for given Unicode codepoint."""
118
+ if codepoint not in self.missing:
119
+ next_unused_glyph_id = self.glyph_count + len(self.missing)
120
+ if next_unused_glyph_id > 2 ** 16 - 1:
121
+ LOGGER.warning(
122
+ f'Too many glyphs missing from "{self.family}", '
123
+ 'expect text selection problems')
124
+ next_unused_glyph_id = 2 ** 16 - 1
125
+ self.missing[codepoint] = next_unused_glyph_id
126
+ return self.missing[codepoint]
127
+
128
+ def clean(self, to_unicode, hinting):
114
129
  """Remove useless data from font."""
115
130
 
116
131
  # Subset font.
117
- self.subset(cmap, hinting)
132
+ self.subset(to_unicode, hinting)
118
133
 
119
134
  # Transform variable into static font.
120
135
  if 'fvar' in self.tables:
121
136
  full_font = io.BytesIO(self.file_content)
122
137
  ttfont = TTFont(full_font, fontNumber=self.index)
123
- if 'wght' not in self.variations:
138
+ axes = {axis.axisTag: axis for axis in ttfont['fvar'].axes}
139
+ if 'wght' in axes and 'wght' not in self.variations:
124
140
  self.variations['wght'] = self.weight
125
- if 'opsz' not in self.variations:
141
+ if 'opsz' in axes and 'opsz' not in self.variations:
126
142
  self.variations['opsz'] = self.font_size
127
- if 'slnt' not in self.variations:
143
+ if 'slnt' in axes and 'slnt' not in self.variations:
128
144
  slnt = 0
129
145
  if self.style == 1:
130
- for axe in ttfont['fvar'].axes:
131
- if axe.axisTag == 'slnt':
132
- if axe.maxValue == 0:
133
- slnt = axe.minValue
134
- else:
135
- slnt = axe.maxValue
136
- break
146
+ if axes['slnt'].maxValue == 0:
147
+ slnt = axes['slnt'].minValue
148
+ else:
149
+ slnt = axes['slnt'].maxValue
137
150
  self.variations['slnt'] = slnt
138
- if 'ital' not in self.variations:
151
+ if 'ital' in axes and 'ital' not in self.variations:
139
152
  self.variations['ital'] = int(self.style == 2)
140
153
  partial_font = io.BytesIO()
141
154
  try:
142
- ttfont = instantiateVariableFont(ttfont, self.variations)
143
- for key, (advance, bearing) in ttfont['hmtx'].metrics.items():
144
- if advance < 0:
145
- ttfont['hmtx'].metrics[key] = (0, bearing)
155
+ ttfont = instantiateVariableFont(ttfont, self.variations, static=True)
146
156
  ttfont.save(partial_font)
147
- except Exception:
148
- LOGGER.warning('Unable to mutate variable font')
157
+ except Exception as exception:
158
+ LOGGER.warning(f'Unable to instantiate "{self.family}" variable font')
159
+ LOGGER.debug('Original exception:', exc_info=exception)
149
160
  else:
150
161
  self.file_content = partial_font.getvalue()
151
162
 
@@ -171,25 +182,26 @@ class Font:
171
182
  output_font = io.BytesIO()
172
183
  ttfont.save(output_font)
173
184
  self.file_content = output_font.getvalue()
174
- except TTLibError:
175
- LOGGER.warning('Unable to save emoji font')
185
+ except TTLibError as exception:
186
+ LOGGER.warning(f'Unable to save emoji font "{self.family}"')
187
+ LOGGER.debug('Original exception:', exc_info=exception)
176
188
 
177
189
  @property
178
190
  def type(self):
179
191
  return 'otf' if self.file_content[:4] == b'OTTO' else 'ttf'
180
192
 
181
- def subset(self, cmap, hinting):
193
+ def subset(self, to_unicode, hinting):
182
194
  """Remove unused glyphs and tables from font."""
183
- if not cmap:
195
+ if not to_unicode:
184
196
  return
185
197
 
186
198
  if harfbuzz_subset and harfbuzz.hb_version_atleast(4, 1, 0):
187
199
  # 4.1.0 is required for hb_set_add_sorted_array.
188
- self._harfbuzz_subset(cmap, hinting)
200
+ self._harfbuzz_subset(to_unicode, hinting)
189
201
  else:
190
- self._fonttools_subset(cmap, hinting)
202
+ self._fonttools_subset(to_unicode, hinting)
191
203
 
192
- def _harfbuzz_subset(self, cmap, hinting):
204
+ def _harfbuzz_subset(self, to_unicode, hinting):
193
205
  """Subset font using Harfbuzz."""
194
206
  hb_subset = ffi.gc(
195
207
  harfbuzz_subset.hb_subset_input_create_or_fail(),
@@ -197,14 +209,16 @@ class Font:
197
209
 
198
210
  # Only keep used glyphs.
199
211
  gid_set = harfbuzz_subset.hb_subset_input_glyph_set(hb_subset)
200
- gid_array = ffi.new(f'hb_codepoint_t[{len(cmap)}]', sorted(cmap))
201
- harfbuzz.hb_set_add_sorted_array(gid_set, gid_array, len(cmap))
212
+ gid_array = ffi.new(f'hb_codepoint_t[{len(to_unicode)}]', sorted(to_unicode))
213
+ harfbuzz.hb_set_add_sorted_array(gid_set, gid_array, len(to_unicode))
202
214
 
203
215
  # Set flags.
204
216
  flags = (
205
217
  harfbuzz_subset.HB_SUBSET_FLAGS_RETAIN_GIDS |
206
218
  harfbuzz_subset.HB_SUBSET_FLAGS_PASSTHROUGH_UNRECOGNIZED |
207
219
  harfbuzz_subset.HB_SUBSET_FLAGS_DESUBROUTINIZE)
220
+ if self.missing:
221
+ flags |= harfbuzz_subset.HB_SUBSET_FLAGS_NOTDEF_OUTLINE
208
222
  harfbuzz_subset.hb_subset_input_set_flags(hb_subset, flags)
209
223
 
210
224
  # Drop useless tables.
@@ -223,7 +237,7 @@ class Font:
223
237
 
224
238
  # Drop empty glyphs after last one used.
225
239
  gid_set = harfbuzz_subset.hb_subset_input_glyph_set(hb_subset)
226
- keep = tuple(range(max(cmap) + 1))
240
+ keep = tuple(range(max(to_unicode) + 1))
227
241
  gid_array = ffi.new(f'hb_codepoint_t[{len(keep)}]', keep)
228
242
  harfbuzz.hb_set_add_sorted_array(gid_set, gid_array, len(keep))
229
243
 
@@ -233,6 +247,8 @@ class Font:
233
247
  harfbuzz_subset.HB_SUBSET_FLAGS_DESUBROUTINIZE)
234
248
  if not hinting:
235
249
  flags |= harfbuzz_subset.HB_SUBSET_FLAGS_NO_HINTING
250
+ if self.missing:
251
+ flags |= harfbuzz_subset.HB_SUBSET_FLAGS_NOTDEF_OUTLINE
236
252
  harfbuzz_subset.hb_subset_input_set_flags(hb_subset, flags)
237
253
 
238
254
  # Subset font.
@@ -247,19 +263,19 @@ class Font:
247
263
  self.file_content = file_content
248
264
  return
249
265
 
250
- LOGGER.warning('Unable to subset font with Harfbuzz')
266
+ LOGGER.warning(f'Unable to subset "{self.family}" with HarfBuzz')
251
267
 
252
- def _fonttools_subset(self, cmap, hinting):
268
+ def _fonttools_subset(self, to_unicode, hinting):
253
269
  """Subset font using Fonttools."""
254
270
  full_font = io.BytesIO(self.file_content)
255
271
 
256
272
  # Set subset options.
257
273
  options = subset.Options(
258
274
  retain_gids=True, passthrough_tables=True, ignore_missing_glyphs=True,
259
- hinting=hinting, desubroutinize=True)
275
+ hinting=hinting, desubroutinize=True, notdef_outline=bool(self.missing))
260
276
  options.drop_tables += ['GSUB', 'GPOS', 'SVG']
261
277
  subsetter = subset.Subsetter(options)
262
- subsetter.populate(gids=cmap)
278
+ subsetter.populate(gids=to_unicode)
263
279
 
264
280
  # Subset font.
265
281
  try:
@@ -268,10 +284,11 @@ class Font:
268
284
  subsetter.subset(ttfont)
269
285
  for log in logs:
270
286
  LOGGER.warning(
271
- 'fontTools warning when subsetting "%s": %s',
272
- self.family.decode(), log)
273
- except TTLibError:
274
- LOGGER.warning('Unable to subset font with fontTools')
287
+ 'fontTools warning when subsetting '
288
+ f'"{self.family}": {log}')
289
+ except TTLibError as exception:
290
+ LOGGER.warning(f'Unable to subset "{self.family}" with fontTools')
291
+ LOGGER.debug('Original exception:', exc_info=exception)
275
292
  else:
276
293
  optimized_font = io.BytesIO()
277
294
  ttfont.save(optimized_font)
@@ -292,11 +309,11 @@ def build_fonts_dictionary(pdf, fonts, compress, subset, options):
292
309
  continue
293
310
 
294
311
  # Clean font, optimize and handle emojis.
295
- cmap = {}
312
+ to_unicode = {}
296
313
  if subset and not font.used_in_forms:
297
314
  for file_font in file_fonts:
298
- cmap = {**cmap, **file_font.cmap}
299
- font.clean(cmap, options['hinting'])
315
+ to_unicode = {**to_unicode, **file_font.to_unicode}
316
+ font.clean(to_unicode, options['hinting'])
300
317
 
301
318
  # Include font.
302
319
  if font.type == 'otf':
@@ -311,20 +328,20 @@ def build_fonts_dictionary(pdf, fonts, compress, subset, options):
311
328
  if subset and not font.used_in_forms:
312
329
  # Only store widths and map for used glyphs
313
330
  font_widths = font.widths
314
- cmap = font.cmap
331
+ to_unicode = font.to_unicode
315
332
  else:
316
333
  # Store width and Unicode map for all glyphs
317
334
  full_font = io.BytesIO(font.file_content)
318
335
  ttfont = TTFont(full_font, fontNumber=font.index)
319
- font_widths, cmap = {}, {}
336
+ font_widths, to_unicode = {}, {}
320
337
  for i, glyph in enumerate(ttfont.getGlyphSet().values()):
321
338
  font_widths[i] = glyph.width * 1000 / font.upem
322
339
  for letter, key in ttfont.getBestCmap().items():
323
- glyph = ttfont.getGlyphID(key)
324
- if glyph not in cmap:
325
- cmap[glyph] = chr(letter)
340
+ glyph_id = ttfont.getGlyphID(key)
341
+ if glyph_id not in to_unicode:
342
+ to_unicode[glyph_id] = chr(letter)
326
343
 
327
- to_unicode = pydyf.Stream([
344
+ to_unicode_object = pydyf.Stream([
328
345
  b'/CIDInit /ProcSet findresource begin',
329
346
  b'12 dict begin',
330
347
  b'begincmap',
@@ -338,28 +355,29 @@ def build_fonts_dictionary(pdf, fonts, compress, subset, options):
338
355
  b'1 begincodespacerange',
339
356
  b'<0000> <ffff>',
340
357
  b'endcodespacerange'], compress=compress)
341
- cmap_length = len(cmap)
342
- cmap_items = tuple(cmap.items())
343
- for i in range(ceil(cmap_length / 100)):
344
- batch_length = min(100, cmap_length - i * 100)
345
- to_unicode.stream.append(f'{batch_length} beginbfchar'.encode())
346
- for glyph, text in cmap_items[i*100:(i+1)*100]:
358
+ to_unicode_stream = to_unicode_object.stream
359
+ to_unicode_length = len(to_unicode)
360
+ to_unicode_items = tuple(to_unicode.items())
361
+ for i in range(ceil(to_unicode_length / 100)):
362
+ batch_length = min(100, to_unicode_length - i * 100)
363
+ to_unicode_stream.append(f'{batch_length} beginbfchar'.encode())
364
+ for glyph, text in to_unicode_items[i*100:(i+1)*100]:
347
365
  unicode_codepoints = ''.join(
348
366
  f'{letter.encode("utf-16-be").hex()}' for letter in text)
349
- to_unicode.stream.append(
367
+ to_unicode_stream.append(
350
368
  f'<{glyph:04x}> <{unicode_codepoints}>'.encode())
351
- to_unicode.stream.append(b'endbfchar')
352
- to_unicode.stream.extend([
369
+ to_unicode_stream.append(b'endbfchar')
370
+ to_unicode_stream.extend([
353
371
  b'endcmap',
354
372
  b'CMapName currentdict /CMap defineresource pop',
355
373
  b'end',
356
374
  b'end'])
357
- pdf.add_object(to_unicode)
375
+ pdf.add_object(to_unicode_object)
358
376
  font_dictionary = pydyf.Dictionary({
359
377
  'Type': '/Font',
360
378
  'Subtype': f'/Type{3 if font.bitmap else 0}',
361
379
  'BaseFont': font.name,
362
- 'ToUnicode': to_unicode.reference,
380
+ 'ToUnicode': to_unicode_object.reference,
363
381
  })
364
382
 
365
383
  if font.bitmap:
@@ -380,7 +398,7 @@ def _build_bitmap_font_dictionary(font_dictionary, pdf, font, widths, compress,
380
398
  font_dictionary['FontBBox'] = pydyf.Array([0, 0, 1, 1])
381
399
  font_dictionary['FontMatrix'] = pydyf.Array([1, 0, 0, 1, 0, 0])
382
400
  if subset:
383
- chars = tuple(sorted(font.cmap))
401
+ chars = tuple(sorted(font.to_unicode))
384
402
  else:
385
403
  chars = tuple(range(256))
386
404
  first, last = chars[0], chars[-1]
@@ -419,7 +437,8 @@ def _build_bitmap_font_dictionary(font_dictionary, pdf, font, widths, compress,
419
437
  bearing_y = subtable.metrics.horiBearingY
420
438
  break
421
439
  else:
422
- LOGGER.warning(f'Unknown bitmap metrics for glyph: {glyph_id}')
440
+ LOGGER.warning(
441
+ f'Unknown bitmap metrics in "{font.family}" for glyph: {glyph_id}')
423
442
  continue
424
443
  else:
425
444
  data_start = 5 if glyph_format in (1, 2, 8) else 8
@@ -466,11 +485,12 @@ def _build_bitmap_font_dictionary(font_dictionary, pdf, font, widths, compress,
466
485
  y = int.from_bytes(data[index+3:index+4], 'big', signed=True)
467
486
  subglyphs.append({'id': subglyph_id, 'x': x, 'y': y})
468
487
  else: # pragma: no cover
469
- LOGGER.warning(f'Unsupported bitmap glyph format: {glyph_format}')
488
+ LOGGER.warning(
489
+ f'Unsupported bitmap glyph format in "{font.family}": {glyph_format}')
470
490
  glyph_info['bitmap'] = bytes(height * stride)
471
491
 
472
492
  for glyph_id, glyph_info in glyphs_info.items():
473
- # Don’t store glyph not in cmap.
493
+ # Don’t store glyph not in to_unicode.
474
494
  if glyph_id not in chars:
475
495
  continue
476
496
 
@@ -488,12 +508,14 @@ def _build_bitmap_font_dictionary(font_dictionary, pdf, font, widths, compress,
488
508
  sub_y = subglyph['y']
489
509
  sub_id = subglyph['id']
490
510
  if sub_id not in glyphs_info:
491
- LOGGER.warning(f'Unknown subglyph: {sub_id}')
511
+ LOGGER.warning(f'Unknown subglyph in "{font.family}": {sub_id}')
492
512
  continue
493
513
  subglyph = glyphs_info[sub_id]
494
514
  if subglyph['bitmap'] is None:
495
515
  # TODO: Support subglyph in subglyph.
496
- LOGGER.warning(f'Unsupported subglyph in subglyph: {sub_id}')
516
+ LOGGER.warning(
517
+ 'Unsupported subglyph in subglyph in '
518
+ f'"{font.family}": {sub_id}')
497
519
  continue
498
520
  for row_y in range(subglyph['height']):
499
521
  row_slice = slice(
@@ -569,8 +591,6 @@ def _build_vector_font_dictionary(font_dictionary, pdf, font, widths, compress,
569
591
  compress=compress)
570
592
  pdf.add_object(stream)
571
593
  font_descriptor['CIDSet'] = stream.reference
572
- if font.type == 'otf':
573
- font_descriptor['Subtype'] = '/OpenType'
574
594
  pdf.add_object(font_descriptor)
575
595
 
576
596
  pdf_widths = pydyf.Array()
@@ -595,5 +615,42 @@ def _build_vector_font_dictionary(font_dictionary, pdf, font, widths, compress,
595
615
  'FontDescriptor': font_descriptor.reference,
596
616
  })
597
617
  pdf.add_object(subfont_dictionary)
598
- font_dictionary['Encoding'] = '/Identity-H'
618
+ if font.missing:
619
+ # Add CMap that doesn’t include missing glyphs, so that they can be replaced by
620
+ # .notdef.
621
+ encoding = pydyf.Stream([
622
+ b'/CIDInit /ProcSet findresource begin',
623
+ b'12 dict begin',
624
+ b'begincmap',
625
+ b'/CIDSystemInfo',
626
+ b'3 dict dup begin',
627
+ b'/Registry (WP) def',
628
+ b'/Ordering (Encod) def',
629
+ b'/Supplement 0 def',
630
+ b'end def',
631
+ b'/CMapName /WP-Encod-0 def',
632
+ b'/CMapType 1 def',
633
+ b'1 begincodespacerange',
634
+ b'<0000> <ffff>',
635
+ b'endcodespacerange',
636
+ ], compress=compress)
637
+ available = tuple(font.to_unicode)
638
+ available_length = len(available)
639
+ for i in range(ceil(available_length / 100)):
640
+ batch_length = min(100, available_length - i * 100)
641
+ encoding.stream.append(f'{batch_length} begincidchar'.encode())
642
+ for glyph_id in available[i*100:(i+1)*100]:
643
+ font_glyph_id = 0 if glyph_id in font.missing.values() else glyph_id
644
+ encoding.stream.append(f'<{glyph_id:04x}> {font_glyph_id}'.encode())
645
+ encoding.stream.append(b'endcidchar')
646
+ encoding.stream.extend([
647
+ b'endcmap',
648
+ b'CMapName currentdict /CMap defineresource pop',
649
+ b'end',
650
+ b'end'])
651
+ pdf.add_object(encoding)
652
+ font_dictionary['Encoding'] = encoding.reference
653
+ else:
654
+ # No missing glyph in this font, use the identity mapping to map all glyphs.
655
+ font_dictionary['Encoding'] = '/Identity-H'
599
656
  font_dictionary['DescendantFonts'] = pydyf.Array([subfont_dictionary.reference])
@@ -1,5 +1,6 @@
1
1
  """PDF metadata stream generation."""
2
2
 
3
+ from uuid import uuid4
3
4
  from xml.etree.ElementTree import Element, SubElement, register_namespace, tostring
4
5
 
5
6
  import pydyf
@@ -11,9 +12,12 @@ NS = {
11
12
  'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
12
13
  'dc': 'http://purl.org/dc/elements/1.1/',
13
14
  'xmp': 'http://ns.adobe.com/xap/1.0/',
15
+ 'xmpMM': 'http://ns.adobe.com/xap/1.0/mm/',
14
16
  'pdf': 'http://ns.adobe.com/pdf/1.3/',
15
17
  'pdfaid': 'http://www.aiim.org/pdfa/ns/id/',
16
18
  'pdfuaid': 'http://www.aiim.org/pdfua/ns/id/',
19
+ 'pdfxid': 'http://www.npes.org/pdfx/ns/id/',
20
+ 'pdfx': 'http://ns.adobe.com/pdfx/1.3/',
17
21
  }
18
22
  for key, value in NS.items():
19
23
  register_namespace(key, value)
@@ -44,11 +48,38 @@ def generate_rdf_metadata(metadata, variant, version, conformance):
44
48
  namespace = f'pdf{variant}id'
45
49
  rdf = Element(f'{{{NS["rdf"]}}}RDF')
46
50
 
47
- element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
48
- element.attrib[f'{{{NS["rdf"]}}}about'] = ''
49
- element.attrib[f'{{{NS[namespace]}}}part'] = str(version)
51
+ if version:
52
+ element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
53
+ element.attrib[f'{{{NS["rdf"]}}}about'] = ''
54
+ element.attrib[f'{{{NS[namespace]}}}part'] = str(version)
50
55
  if conformance:
51
- element.attrib[f'{{{NS[namespace]}}}conformance'] = conformance
56
+ assert version
57
+ if variant == 'x':
58
+ for key in (
59
+ f'{{{NS["pdfxid"]}}}GTS_PDFXVersion',
60
+ f'{{{NS["pdfx"]}}}GTS_PDFXVersion',
61
+ f'{{{NS["pdfx"]}}}GTS_PDFXConformance',
62
+ ):
63
+ subelement = SubElement(element, key)
64
+ subelement.text = conformance
65
+ subelement = SubElement(element, f'{{{NS["pdf"]}}}Trapped')
66
+ subelement.text = 'False'
67
+ if version >= 4:
68
+ # TODO: these values could be useful instead of using random values.
69
+ assert metadata.modified
70
+ subelement = SubElement(element, f'{{{NS["xmp"]}}}MetadataDate')
71
+ subelement.text = metadata.modified
72
+ subelement = SubElement(element, f'{{{NS["xmpMM"]}}}DocumentID')
73
+ subelement.text = f'xmp.did:{uuid4()}'
74
+ subelement = SubElement(element, f'{{{NS["xmpMM"]}}}RenditionClass')
75
+ subelement.text = 'proof:pdf'
76
+ subelement = SubElement(element, f'{{{NS["xmpMM"]}}}VersionID')
77
+ subelement.text = '1'
78
+ else:
79
+ element.attrib[f'{{{NS[namespace]}}}conformance'] = conformance
80
+ if variant == 'a' and version == 4:
81
+ subelement = SubElement(element, f'{{{NS["pdfaid"]}}}rev')
82
+ subelement.text = '2020'
52
83
 
53
84
  element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
54
85
  element.attrib[f'{{{NS["rdf"]}}}about'] = ''
@@ -76,6 +107,7 @@ def generate_rdf_metadata(metadata, variant, version, conformance):
76
107
  element = SubElement(element, f'{{{NS["dc"]}}}subject')
77
108
  element = SubElement(element, f'{{{NS["rdf"]}}}Bag')
78
109
  element = SubElement(element, f'{{{NS["rdf"]}}}li')
110
+ element.attrib['xml:lang'] = 'x-default'
79
111
  element.text = metadata.description
80
112
  if metadata.keywords:
81
113
  element = SubElement(rdf, f'{{{NS["rdf"]}}}Description')
weasyprint/pdf/pdfa.py CHANGED
@@ -67,6 +67,10 @@ def pdfa(pdf, metadata, document, page_streams, attachments, compress,
67
67
  compress = False
68
68
  add_metadata(pdf, metadata, 'a', version, variant, compress)
69
69
 
70
+ # Remove document information.
71
+ if version >= 4:
72
+ pdf.info.clear()
73
+
70
74
 
71
75
  VARIANTS = {
72
76
  'pdf/a-1b': (
@@ -78,9 +82,6 @@ VARIANTS = {
78
82
  'pdf/a-3b': (
79
83
  partial(pdfa, version=3, variant='B'),
80
84
  {'version': '1.7', 'identifier': True, 'srgb': True}),
81
- 'pdf/a-4b': (
82
- partial(pdfa, version=4, variant='B'),
83
- {'version': '2.0', 'identifier': True, 'srgb': True}),
84
85
  'pdf/a-2u': (
85
86
  partial(pdfa, version=2, variant='U'),
86
87
  {'version': '1.7', 'identifier': True, 'srgb': True}),
@@ -90,4 +91,19 @@ VARIANTS = {
90
91
  'pdf/a-4u': (
91
92
  partial(pdfa, version=4, variant='U'),
92
93
  {'version': '2.0', 'identifier': True, 'srgb': True}),
94
+ 'pdf/a-1a': (
95
+ partial(pdfa, version=1, variant='A'),
96
+ {'version': '1.4', 'identifier': True, 'srgb': True, 'pdf_tags': True}),
97
+ 'pdf/a-2a': (
98
+ partial(pdfa, version=2, variant='A'),
99
+ {'version': '1.7', 'identifier': True, 'srgb': True, 'pdf_tags': True}),
100
+ 'pdf/a-3a': (
101
+ partial(pdfa, version=3, variant='A'),
102
+ {'version': '1.7', 'identifier': True, 'srgb': True, 'pdf_tags': True}),
103
+ 'pdf/a-4e': (
104
+ partial(pdfa, version=4, variant='E'),
105
+ {'version': '2.0', 'identifier': True, 'srgb': True}),
106
+ 'pdf/a-4f': (
107
+ partial(pdfa, version=4, variant='F'),
108
+ {'version': '2.0', 'identifier': True, 'srgb': True}),
93
109
  }