xmlppt 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
xmlppt/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .editor import PowerPointEditor
2
+
3
+ __all__ = ["PowerPointEditor"]
xmlppt/editor.py ADDED
@@ -0,0 +1,1085 @@
1
+ from zipfile import ZipFile
2
+ from lxml import etree
3
+ import re
4
+ import posixpath
5
+ from io import BytesIO
6
+ from openpyxl import load_workbook
7
+ import win32com.client
8
+ import os
9
+
10
+
11
+ # Namespaces
12
+ P_NS = "http://schemas.openxmlformats.org/presentationml/2006/main"
13
+ C_NS = "http://schemas.openxmlformats.org/drawingml/2006/chart"
14
+ R_NS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
15
+ PR_NS = "http://schemas.openxmlformats.org/package/2006/relationships"
16
+ CX_NS = "http://schemas.microsoft.com/office/drawing/2014/chartex"
17
+ A_NS = "http://schemas.openxmlformats.org/drawingml/2006/main"
18
+ CT_NS = "http://schemas.openxmlformats.org/package/2006/content-types"
19
+ P14_NS = "http://schemas.microsoft.com/office/powerpoint/2010/main"
20
+
21
+ NS = {
22
+ "p": P_NS,
23
+ "c": C_NS,
24
+ "r": R_NS,
25
+ "pr": PR_NS,
26
+ "cx": CX_NS,
27
+ "a": A_NS,
28
+ "ct": CT_NS,
29
+ "p14": P14_NS,
30
+ }
31
+
32
+
33
+ REL_TYPE_SLIDE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide"
34
+ REL_TYPE_SLIDE_LAYOUT = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout"
35
+ REL_TYPE_CHART = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/chart"
36
+ REL_TYPE_PACKAGE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/package"
37
+ REL_TYPE_IMAGE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"
38
+ REL_TYPE_HYPERLINK = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
39
+
40
+ SLIDE_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.slide+xml"
41
+
42
+ SLIDE_RE = re.compile(r"^ppt/slides/slide(\d+)\.xml$")
43
+
44
+
45
+ class PowerPointEditor:
46
+ """A lightweight editor for .pptx files that exposes operations
47
+ such as duplicating template slides, editing textboxes and charts,
48
+ and saving the modified package back to disk.
49
+ """
50
+
51
+ def __init__(self, input_pptx: str):
52
+ self.input_pptx = input_pptx
53
+ self.files = self._load_pptx_files(input_pptx)
54
+ self._has_changes = False
55
+
56
+ # Basic package IO
57
+ @staticmethod
58
+ def _normalize_relationship_target(base_part: str, target: str) -> str:
59
+ base_dir = posixpath.dirname(base_part)
60
+ return posixpath.normpath(posixpath.join(base_dir, target))
61
+
62
+ def _load_pptx_files(self, pptx_path: str) -> dict[str, bytes]:
63
+ with ZipFile(pptx_path, "r") as archive:
64
+ return {name: archive.read(name) for name in archive.namelist()}
65
+
66
+ def _write_pptx_files(self, output_pptx: str) -> None:
67
+ with ZipFile(output_pptx, "w") as archive:
68
+ for name, data in self.files.items():
69
+ archive.writestr(name, data)
70
+
71
+ def _default_output_name(self, input_pptx: str) -> str:
72
+ base, ext = os.path.splitext(input_pptx)
73
+ return f"{base}_updated{ext}"
74
+
75
+ def save(self, output_pptx: str | None = None) -> str:
76
+ if output_pptx is None:
77
+ output_pptx = self._default_output_name(self.input_pptx)
78
+
79
+ self._write_pptx_files(output_pptx)
80
+ self._has_changes = False
81
+ return output_pptx
82
+
83
+ # Package / part helpers
84
+ def _rels_part_for(self, part_name: str) -> str:
85
+ folder = posixpath.dirname(part_name)
86
+ filename = posixpath.basename(part_name)
87
+ return f"{folder}/_rels/{filename}.rels"
88
+
89
+ def _next_numbered_part_name(self, folder: str, prefix: str, ext: str) -> str:
90
+ pattern = re.compile(rf"^{re.escape(folder)}/{re.escape(prefix)}(\d+){re.escape(ext)}$")
91
+
92
+ used = []
93
+ for name in self.files:
94
+ match = pattern.match(name)
95
+ if match:
96
+ used.append(int(match.group(1)))
97
+
98
+ next_num = max(used, default=0) + 1
99
+ return f"{folder}/{prefix}{next_num}{ext}"
100
+
101
+ def _next_slide_part(self) -> tuple[int, str]:
102
+ slide_numbers = [
103
+ int(match.group(1))
104
+ for name in self.files
105
+ if (match := SLIDE_RE.match(name))
106
+ ]
107
+
108
+ next_num = max(slide_numbers, default=0) + 1
109
+ return next_num, f"ppt/slides/slide{next_num}.xml"
110
+
111
+ def _next_relationship_id(self, rels_root) -> str:
112
+ used = []
113
+
114
+ for rel in rels_root.xpath("./pr:Relationship", namespaces=NS):
115
+ rid = rel.get("Id", "")
116
+ match = re.match(r"rId(\d+)$", rid)
117
+ if match:
118
+ used.append(int(match.group(1)))
119
+
120
+ return f"rId{max(used, default=0) + 1}"
121
+
122
+ def _next_slide_id(self, presentation_root) -> str:
123
+ ids = []
124
+
125
+ for sld_id in presentation_root.xpath(".//p:sldId", namespaces=NS):
126
+ value = sld_id.get("id")
127
+ if value and value.isdigit():
128
+ ids.append(int(value))
129
+
130
+ return str(max(ids, default=255) + 1)
131
+
132
+ def _add_content_type_override(self, part_name: str, content_type: str) -> None:
133
+ content_types_part = "[Content_Types].xml"
134
+ root = etree.fromstring(self.files[content_types_part])
135
+
136
+ part_name_with_slash = f"/{part_name}"
137
+
138
+ existing = root.xpath(
139
+ f"./ct:Override[@PartName='{part_name_with_slash}']",
140
+ namespaces=NS,
141
+ )
142
+
143
+ if existing:
144
+ existing[0].set("ContentType", content_type)
145
+ else:
146
+ override = etree.SubElement(root, f"{{{CT_NS}}}Override")
147
+ override.set("PartName", part_name_with_slash)
148
+ override.set("ContentType", content_type)
149
+
150
+ self.files[content_types_part] = etree.tostring(
151
+ root,
152
+ xml_declaration=True,
153
+ encoding="UTF-8",
154
+ standalone="yes",
155
+ )
156
+
157
+ # Section-aware slide insertion
158
+ def _find_insert_index_before_section(self, presentation_root, section_name: str) -> int | None:
159
+ wanted = section_name.strip().casefold()
160
+
161
+ sections = presentation_root.xpath(".//p14:section", namespaces=NS)
162
+ if not sections:
163
+ raise ValueError("No PowerPoint sections found in presentation.xml. Create a section named 'template_slides' in PowerPoint first.")
164
+
165
+ target_section = None
166
+ for section in sections:
167
+ actual = (section.get("name") or "").strip().casefold()
168
+ if actual == wanted:
169
+ target_section = section
170
+ break
171
+
172
+ if target_section is None:
173
+ raise ValueError(f"Section named '{section_name}' not found")
174
+
175
+ section_slide_ids = target_section.xpath("./p14:sldIdLst/p14:sldId", namespaces=NS)
176
+ if not section_slide_ids:
177
+ raise ValueError(f"Section '{section_name}' contains no slides")
178
+
179
+ first_template_slide_id = section_slide_ids[0].get("id")
180
+ if not first_template_slide_id:
181
+ raise ValueError(f"First slide in section '{section_name}' has no slide id")
182
+
183
+ sld_id_lst = presentation_root.xpath("./p:sldIdLst", namespaces=NS)
184
+ if not sld_id_lst:
185
+ raise ValueError("No p:sldIdLst found in presentation.xml")
186
+
187
+ all_slide_ids = sld_id_lst[0].xpath("./p:sldId", namespaces=NS)
188
+
189
+ for index, sld_id in enumerate(all_slide_ids):
190
+ if sld_id.get("id") == first_template_slide_id:
191
+ return index
192
+
193
+ raise ValueError(f"Could not locate first slide of section '{section_name}' in p:sldIdLst")
194
+
195
+ def _add_slide_id_to_section_before(self, presentation_root, new_slide_id: str, before_section_name: str) -> None:
196
+ wanted = before_section_name.strip().casefold()
197
+
198
+ sections = presentation_root.xpath(".//p14:section", namespaces=NS)
199
+ if not sections:
200
+ return
201
+
202
+ target_index = None
203
+ for index, section in enumerate(sections):
204
+ actual = (section.get("name") or "").strip().casefold()
205
+ if actual == wanted:
206
+ target_index = index
207
+ break
208
+
209
+ if target_index is None:
210
+ raise ValueError(f"Section named '{before_section_name}' not found")
211
+
212
+ if target_index == 0:
213
+ raise ValueError("Cannot insert before section because it is the first section.")
214
+
215
+ previous_section = sections[target_index - 1]
216
+
217
+ sld_id_lst = previous_section.xpath("./p14:sldIdLst", namespaces=NS)
218
+ if sld_id_lst:
219
+ section_sld_id_lst = sld_id_lst[0]
220
+ else:
221
+ section_sld_id_lst = etree.SubElement(previous_section, f"{{{P14_NS}}}sldIdLst")
222
+
223
+ new_section_sld_id = etree.SubElement(section_sld_id_lst, f"{{{P14_NS}}}sldId")
224
+ new_section_sld_id.set("id", new_slide_id)
225
+
226
+ def _add_slide_to_presentation(self, slide_part: str, before_section_name: str | None = None) -> int:
227
+ presentation_part = "ppt/presentation.xml"
228
+ presentation_rels_part = "ppt/_rels/presentation.xml.rels"
229
+
230
+ presentation_root = etree.fromstring(self.files[presentation_part])
231
+ presentation_rels_root = etree.fromstring(self.files[presentation_rels_part])
232
+
233
+ new_rid = self._next_relationship_id(presentation_rels_root)
234
+
235
+ rel = etree.SubElement(presentation_rels_root, f"{{{PR_NS}}}Relationship")
236
+ rel.set("Id", new_rid)
237
+ rel.set("Type", REL_TYPE_SLIDE)
238
+ rel.set("Target", posixpath.relpath(slide_part, "ppt"))
239
+
240
+ sld_id_lst = presentation_root.xpath("./p:sldIdLst", namespaces=NS)
241
+ if not sld_id_lst:
242
+ sld_id_lst_elem = etree.SubElement(presentation_root, f"{{{P_NS}}}sldIdLst")
243
+ else:
244
+ sld_id_lst_elem = sld_id_lst[0]
245
+
246
+ new_slide_id = self._next_slide_id(presentation_root)
247
+
248
+ new_sld_id = etree.Element(f"{{{P_NS}}}sldId")
249
+ new_sld_id.set("id", new_slide_id)
250
+ new_sld_id.set(f"{{{R_NS}}}id", new_rid)
251
+
252
+ insert_index = None
253
+
254
+ if before_section_name:
255
+ insert_index = self._find_insert_index_before_section(presentation_root=presentation_root, section_name=before_section_name)
256
+
257
+ if insert_index is None:
258
+ sld_id_lst_elem.append(new_sld_id)
259
+ else:
260
+ sld_id_lst_elem.insert(insert_index, new_sld_id)
261
+
262
+ if before_section_name:
263
+ self._add_slide_id_to_section_before(presentation_root=presentation_root, new_slide_id=new_slide_id, before_section_name=before_section_name)
264
+
265
+ self.files[presentation_part] = etree.tostring(presentation_root, xml_declaration=True, encoding="UTF-8", standalone="yes")
266
+ self.files[presentation_rels_part] = etree.tostring(presentation_rels_root, xml_declaration=True, encoding="UTF-8", standalone="yes")
267
+
268
+ slide_num_match = SLIDE_RE.match(slide_part)
269
+ if not slide_num_match:
270
+ raise ValueError(f"Unexpected slide part name: {slide_part}")
271
+
272
+ return int(slide_num_match.group(1))
273
+
274
+ # Deep-copy slide dependencies
275
+ def _copy_related_part_if_needed(self, source_part: str, dest_part: str, rel_type: str) -> str | None:
276
+ if rel_type in {REL_TYPE_SLIDE_LAYOUT, REL_TYPE_IMAGE}:
277
+ return None
278
+
279
+ if rel_type == REL_TYPE_CHART:
280
+ new_part = self._next_numbered_part_name("ppt/charts", "chart", ".xml")
281
+ elif rel_type == REL_TYPE_PACKAGE:
282
+ new_part = self._next_numbered_part_name("ppt/embeddings", "Microsoft_Excel_Worksheet", ".xlsx")
283
+ else:
284
+ return None
285
+
286
+ self._deep_copy_part(source_part, new_part)
287
+
288
+ dest_folder = posixpath.dirname(dest_part)
289
+ return posixpath.relpath(new_part, dest_folder)
290
+
291
+ def _deep_copy_part(self, source_part: str, dest_part: str) -> None:
292
+ if source_part not in self.files:
293
+ raise FileNotFoundError(f"Cannot copy missing part: {source_part}")
294
+
295
+ self.files[dest_part] = self.files[source_part]
296
+
297
+ source_rels_part = self._rels_part_for(source_part)
298
+ dest_rels_part = self._rels_part_for(dest_part)
299
+
300
+ if source_rels_part not in self.files:
301
+ return
302
+
303
+ rels_root = etree.fromstring(self.files[source_rels_part])
304
+
305
+ for rel in rels_root.xpath("./pr:Relationship", namespaces=NS):
306
+ target_mode = rel.get("TargetMode")
307
+ if target_mode == "External":
308
+ continue
309
+
310
+ target = rel.get("Target")
311
+ rel_type = rel.get("Type")
312
+
313
+ if not target or not rel_type:
314
+ continue
315
+
316
+ source_related_part = self._normalize_relationship_target(source_part, target)
317
+
318
+ if source_related_part not in self.files:
319
+ continue
320
+
321
+ new_target = self._copy_related_part_if_needed(source_part=source_related_part, dest_part=dest_part, rel_type=rel_type)
322
+
323
+ if new_target:
324
+ rel.set("Target", new_target)
325
+
326
+ self.files[dest_rels_part] = etree.tostring(rels_root, xml_declaration=True, encoding="UTF-8", standalone="yes")
327
+
328
+ # Template slide lookup and duplication
329
+ def find_slide_by_shape_name(self, shape_name: str) -> int:
330
+ wanted = shape_name.strip().casefold()
331
+
332
+ slide_parts = [(int(match.group(1)), name) for name in self.files if (match := SLIDE_RE.match(name))]
333
+ slide_parts.sort(key=lambda item: item[0])
334
+
335
+ for slide_number, slide_part in slide_parts:
336
+ slide_root = etree.fromstring(self.files[slide_part])
337
+
338
+ for shape in slide_root.xpath(".//p:sp", namespaces=NS):
339
+ cNvPr = shape.xpath("./p:nvSpPr/p:cNvPr", namespaces=NS)
340
+ if not cNvPr:
341
+ continue
342
+
343
+ actual = (cNvPr[0].get("name") or "").strip().casefold()
344
+ if actual == wanted:
345
+ return slide_number
346
+
347
+ for frame in slide_root.xpath(".//p:graphicFrame", namespaces=NS):
348
+ cNvPr = frame.xpath("./p:nvGraphicFramePr/p:cNvPr", namespaces=NS)
349
+ if not cNvPr:
350
+ continue
351
+
352
+ actual = (cNvPr[0].get("name") or "").strip().casefold()
353
+ if actual == wanted:
354
+ return slide_number
355
+
356
+ raise ValueError(f"Slide marker shape named '{shape_name}' not found")
357
+
358
+ def remove_shape_on_slide(self, slide_number: int, shape_name: str) -> None:
359
+ wanted = shape_name.strip().casefold()
360
+ slide_part = f"ppt/slides/slide{slide_number}.xml"
361
+
362
+ if slide_part not in self.files:
363
+ raise FileNotFoundError(f"Slide not found: {slide_part}")
364
+
365
+ slide_root = etree.fromstring(self.files[slide_part])
366
+ removed = False
367
+
368
+ for shape in slide_root.xpath(".//p:sp", namespaces=NS):
369
+ cNvPr = shape.xpath("./p:nvSpPr/p:cNvPr", namespaces=NS)
370
+ if not cNvPr:
371
+ continue
372
+
373
+ actual = (cNvPr[0].get("name") or "").strip().casefold()
374
+ if actual == wanted:
375
+ shape.getparent().remove(shape)
376
+ removed = True
377
+ break
378
+
379
+ if not removed:
380
+ for frame in slide_root.xpath(".//p:graphicFrame", namespaces=NS):
381
+ cNvPr = frame.xpath("./p:nvGraphicFramePr/p:cNvPr", namespaces=NS)
382
+ if not cNvPr:
383
+ continue
384
+
385
+ actual = (cNvPr[0].get("name") or "").strip().casefold()
386
+ if actual == wanted:
387
+ frame.getparent().remove(frame)
388
+ removed = True
389
+ break
390
+
391
+ if removed:
392
+ self.files[slide_part] = etree.tostring(slide_root, xml_declaration=True, encoding="UTF-8", standalone="yes")
393
+ self._has_changes = True
394
+
395
+ def duplicate_slide(self, template_slide_number: int, before_section_name: str | None = None) -> int:
396
+ source_slide_part = f"ppt/slides/slide{template_slide_number}.xml"
397
+
398
+ if source_slide_part not in self.files:
399
+ raise FileNotFoundError(f"Template slide not found: {source_slide_part}")
400
+
401
+ new_slide_number, new_slide_part = self._next_slide_part()
402
+
403
+ self._deep_copy_part(source_slide_part, new_slide_part)
404
+ self._add_content_type_override(new_slide_part, SLIDE_CONTENT_TYPE)
405
+
406
+ inserted_slide_number = self._add_slide_to_presentation(slide_part=new_slide_part, before_section_name=before_section_name)
407
+
408
+ self._has_changes = True
409
+ return inserted_slide_number
410
+
411
+ def duplicate_template_slide(self, template_name: str, before_section_name: str = "template_slides") -> int:
412
+ marker_shape_name = f"TEMPLATE__{template_name}"
413
+
414
+ template_slide_number = self.find_slide_by_shape_name(marker_shape_name)
415
+
416
+ new_slide_number = self.duplicate_slide(template_slide_number=template_slide_number, before_section_name=before_section_name)
417
+
418
+ # Remove marker from generated slide so future lookups only find template slides.
419
+ self.remove_shape_on_slide(slide_number=new_slide_number, shape_name=marker_shape_name)
420
+
421
+ return new_slide_number
422
+
423
+ # Textbox editing
424
+ def _find_textbox_shape_in_slide(self, slide_root, textbox_name: str):
425
+ wanted = textbox_name.strip().casefold()
426
+
427
+ for shape in slide_root.xpath(".//p:sp", namespaces=NS):
428
+ cNvPr = shape.xpath("./p:nvSpPr/p:cNvPr", namespaces=NS)
429
+ if not cNvPr:
430
+ continue
431
+
432
+ actual_name = (cNvPr[0].get("name") or "").strip().casefold()
433
+ if actual_name == wanted:
434
+ return shape
435
+
436
+ return None
437
+
438
+ def _append_text_run(self, paragraph, text: str, bold: bool = False) -> None:
439
+ run = etree.SubElement(paragraph, f"{{{A_NS}}}r")
440
+
441
+ if bold:
442
+ etree.SubElement(run, f"{{{A_NS}}}rPr", b="1")
443
+
444
+ text_element = etree.SubElement(run, f"{{{A_NS}}}t")
445
+
446
+ if text.startswith(" ") or text.endswith(" "):
447
+ text_element.set("{http://www.w3.org/XML/1998/namespace}space", "preserve")
448
+
449
+ text_element.text = text
450
+
451
+ def _replace_textbox_runs(self, txBody_elem, paragraphs: list[list[tuple[str, bool]]]) -> None:
452
+ for paragraph in txBody_elem.xpath("./a:p", namespaces=NS):
453
+ txBody_elem.remove(paragraph)
454
+
455
+ for runs in paragraphs:
456
+ paragraph = etree.SubElement(txBody_elem, f"{{{A_NS}}}p")
457
+
458
+ if not runs:
459
+ self._append_text_run(paragraph, "", False)
460
+ else:
461
+ for text, bold in runs:
462
+ self._append_text_run(paragraph, text, bold)
463
+
464
+ def _text_to_paragraph_runs(self, text: str) -> list[list[tuple[str, bool]]]:
465
+ normalized = text.replace("\r\n", "\n").replace("\r", "\n")
466
+ return [[(line, False)] for line in normalized.split("\n")]
467
+
468
+ def _parse_textbox_markup(self, markup: str) -> list[list[tuple[str, bool]]]:
469
+ normalized = markup.replace("\r\n", "\n").replace("\r", "\n")
470
+ normalized = re.sub(r"(?i)<br\s*/?>", "\n", normalized)
471
+
472
+ paragraphs: list[list[tuple[str, bool]]] = []
473
+
474
+ for line in normalized.split("\n"):
475
+ runs: list[tuple[str, bool]] = []
476
+ last_index = 0
477
+
478
+ for match in re.finditer(r"(?i)<b>(.*?)</b>", line, flags=re.DOTALL):
479
+ if match.start() > last_index:
480
+ runs.append((line[last_index:match.start()], False))
481
+
482
+ runs.append((match.group(1), True))
483
+ last_index = match.end()
484
+
485
+ if last_index < len(line):
486
+ runs.append((line[last_index:], False))
487
+
488
+ if not runs:
489
+ runs.append(("", False))
490
+
491
+ paragraphs.append(runs)
492
+
493
+ return paragraphs
494
+
495
+ def find_textbox_anywhere(self, textbox_name: str) -> dict:
496
+ slide_parts = [(int(match.group(1)), name) for name in self.files if (match := SLIDE_RE.match(name))]
497
+ slide_parts.sort(key=lambda item: item[0])
498
+
499
+ for slide_number, slide_part in slide_parts:
500
+ slide_root = etree.fromstring(self.files[slide_part])
501
+ shape = self._find_textbox_shape_in_slide(slide_root, textbox_name)
502
+
503
+ if shape is not None:
504
+ return {
505
+ "slide_number": slide_number,
506
+ "slide_part": slide_part,
507
+ }
508
+
509
+ raise ValueError(f"Textbox named '{textbox_name}' not found anywhere in presentation")
510
+
511
+ def edit_textbox_on_slide(self, slide_number: int, textbox_name: str, new_text: str) -> None:
512
+ slide_part = f"ppt/slides/slide{slide_number}.xml"
513
+
514
+ if slide_part not in self.files:
515
+ raise FileNotFoundError(f"Slide not found: {slide_part}")
516
+
517
+ slide_root = etree.fromstring(self.files[slide_part])
518
+ shape = self._find_textbox_shape_in_slide(slide_root, textbox_name)
519
+
520
+ if shape is None:
521
+ raise ValueError(f"Textbox named '{textbox_name}' not found on slide {slide_number}")
522
+
523
+ txBody = shape.xpath("./p:txBody", namespaces=NS)
524
+ if not txBody:
525
+ raise ValueError(f"No text body found in textbox '{textbox_name}'")
526
+
527
+ paragraphs = self._text_to_paragraph_runs(new_text)
528
+ self._replace_textbox_runs(txBody[0], paragraphs)
529
+
530
+ self.files[slide_part] = etree.tostring(slide_root, xml_declaration=True, encoding="UTF-8", standalone="yes")
531
+
532
+ self._has_changes = True
533
+
534
+ def edit_textbox_html_on_slide(self, slide_number: int, textbox_name: str, html: str) -> None:
535
+ slide_part = f"ppt/slides/slide{slide_number}.xml"
536
+
537
+ if slide_part not in self.files:
538
+ raise FileNotFoundError(f"Slide not found: {slide_part}")
539
+
540
+ slide_root = etree.fromstring(self.files[slide_part])
541
+ shape = self._find_textbox_shape_in_slide(slide_root, textbox_name)
542
+
543
+ if shape is None:
544
+ raise ValueError(f"Textbox named '{textbox_name}' not found on slide {slide_number}")
545
+
546
+ txBody = shape.xpath("./p:txBody", namespaces=NS)
547
+ if not txBody:
548
+ raise ValueError(f"No text body found in textbox '{textbox_name}'")
549
+
550
+ paragraphs = self._parse_textbox_markup(html)
551
+ self._replace_textbox_runs(txBody[0], paragraphs)
552
+
553
+ self.files[slide_part] = etree.tostring(slide_root, xml_declaration=True, encoding="UTF-8", standalone="yes")
554
+
555
+ self._has_changes = True
556
+
557
+ def edit_textbox_runs_on_slide(self, slide_number: int, textbox_name: str, paragraphs: list[list[tuple[str, bool]]]) -> None:
558
+ slide_part = f"ppt/slides/slide{slide_number}.xml"
559
+
560
+ if slide_part not in self.files:
561
+ raise FileNotFoundError(f"Slide not found: {slide_part}")
562
+
563
+ slide_root = etree.fromstring(self.files[slide_part])
564
+ shape = self._find_textbox_shape_in_slide(slide_root, textbox_name)
565
+
566
+ if shape is None:
567
+ raise ValueError(f"Textbox named '{textbox_name}' not found on slide {slide_number}")
568
+
569
+ txBody = shape.xpath("./p:txBody", namespaces=NS)
570
+ if not txBody:
571
+ raise ValueError(f"No text body found in textbox '{textbox_name}'")
572
+
573
+ self._replace_textbox_runs(txBody[0], paragraphs)
574
+
575
+ self.files[slide_part] = etree.tostring(slide_root, xml_declaration=True, encoding="UTF-8", standalone="yes")
576
+
577
+ self._has_changes = True
578
+
579
+ # Backward-compatible anywhere methods
580
+ def edit_textbox(self, textbox_name: str, new_text: str) -> None:
581
+ found = self.find_textbox_anywhere(textbox_name)
582
+ self.edit_textbox_on_slide(found["slide_number"], textbox_name, new_text)
583
+
584
+ def edit_textbox_html(self, textbox_name: str, html: str) -> None:
585
+ found = self.find_textbox_anywhere(textbox_name)
586
+ self.edit_textbox_html_on_slide(found["slide_number"], textbox_name, html)
587
+
588
+ # Chart lookup/editing
589
+ def find_chart_on_slide(self, slide_number: int, chart_name: str) -> dict:
590
+ wanted = chart_name.strip().casefold()
591
+
592
+ slide_part = f"ppt/slides/slide{slide_number}.xml"
593
+ rels_part = f"ppt/slides/_rels/slide{slide_number}.xml.rels"
594
+
595
+ if slide_part not in self.files:
596
+ raise FileNotFoundError(f"Slide not found: {slide_part}")
597
+
598
+ if rels_part not in self.files:
599
+ raise FileNotFoundError(f"Slide relationships not found: {rels_part}")
600
+
601
+ slide_root = etree.fromstring(self.files[slide_part])
602
+ rels_root = etree.fromstring(self.files[rels_part])
603
+
604
+ for frame in slide_root.xpath(".//p:graphicFrame", namespaces=NS):
605
+ cNvPr = frame.xpath("./p:nvGraphicFramePr/p:cNvPr", namespaces=NS)
606
+ if not cNvPr:
607
+ continue
608
+
609
+ actual_name = (cNvPr[0].get("name") or "").strip().casefold()
610
+ if actual_name != wanted:
611
+ continue
612
+
613
+ chart_refs = frame.xpath(".//c:chart", namespaces=NS)
614
+ chart_kind = "c"
615
+
616
+ if not chart_refs:
617
+ chart_refs = frame.xpath(".//cx:chart", namespaces=NS)
618
+ chart_kind = "cx"
619
+
620
+ if not chart_refs:
621
+ continue
622
+
623
+ rel_id = chart_refs[0].get(f"{{{R_NS}}}id")
624
+ if not rel_id:
625
+ continue
626
+
627
+ rel = rels_root.xpath(f"./pr:Relationship[@Id='{rel_id}']", namespaces=NS)
628
+ if not rel:
629
+ continue
630
+
631
+ target = rel[0].get("Target")
632
+ if not target:
633
+ continue
634
+
635
+ return {
636
+ "slide_number": slide_number,
637
+ "slide_part": slide_part,
638
+ "chart_part": self._normalize_relationship_target(slide_part, target),
639
+ "rel_id": rel_id,
640
+ "chart_kind": chart_kind,
641
+ }
642
+
643
+ raise ValueError(f"Chart named '{chart_name}' not found on slide {slide_number}")
644
+
645
+ def find_chart_anywhere(self, chart_name: str) -> dict:
646
+ slide_parts = [(int(match.group(1)), name) for name in self.files if (match := SLIDE_RE.match(name))]
647
+ slide_parts.sort(key=lambda item: item[0])
648
+
649
+ for slide_number, _slide_part in slide_parts:
650
+ try:
651
+ return self.find_chart_on_slide(slide_number, chart_name)
652
+ except ValueError:
653
+ continue
654
+
655
+ raise ValueError(f"Chart named '{chart_name}' not found anywhere in presentation")
656
+
657
+ def _replace_str_cache(self, str_cache, labels: list[str]) -> None:
658
+ for pt in str_cache.xpath("./c:pt", namespaces=NS):
659
+ str_cache.remove(pt)
660
+
661
+ pt_count = str_cache.find(f"{{{C_NS}}}ptCount")
662
+ if pt_count is None:
663
+ pt_count = etree.Element(f"{{{C_NS}}}ptCount")
664
+ str_cache.insert(0, pt_count)
665
+
666
+ pt_count.set("val", str(len(labels)))
667
+
668
+ for index, label in enumerate(labels):
669
+ pt = etree.SubElement(str_cache, f"{{{C_NS}}}pt", idx=str(index))
670
+ etree.SubElement(pt, f"{{{C_NS}}}v").text = str(label)
671
+
672
+ def _replace_num_cache(self, num_cache, values: list[float]) -> None:
673
+ for pt in num_cache.xpath("./c:pt", namespaces=NS):
674
+ num_cache.remove(pt)
675
+
676
+ pt_count = num_cache.find(f"{{{C_NS}}}ptCount")
677
+ if pt_count is None:
678
+ pt_count = etree.Element(f"{{{C_NS}}}ptCount")
679
+ num_cache.insert(0, pt_count)
680
+
681
+ pt_count.set("val", str(len(values)))
682
+
683
+ for index, value in enumerate(values):
684
+ pt = etree.SubElement(num_cache, f"{{{C_NS}}}pt", idx=str(index))
685
+ etree.SubElement(pt, f"{{{C_NS}}}v").text = str(value)
686
+
687
+ def _get_embedded_workbook_target(self, chart_part: str, rels_root) -> str | None:
688
+ for rel in rels_root.xpath("./pr:Relationship", namespaces=NS):
689
+ if rel.get("Type") == REL_TYPE_PACKAGE:
690
+ return self._normalize_relationship_target(chart_part, rel.get("Target", ""))
691
+
692
+ return None
693
+
694
+ def _update_chartex_chart(self, chart_root, categories: list[str], values: list[float], subtotal_indices: list[int] | None = None) -> None:
695
+ cat_lvl = chart_root.xpath("//cx:strDim[@type='cat']/cx:lvl", namespaces=NS)
696
+ if cat_lvl:
697
+ lvl = cat_lvl[0]
698
+ lvl.set("ptCount", str(len(categories)))
699
+
700
+ for pt in lvl.xpath("./cx:pt", namespaces=NS):
701
+ lvl.remove(pt)
702
+
703
+ for index, category in enumerate(categories):
704
+ pt = etree.SubElement(lvl, f"{{{CX_NS}}}pt", {"idx": str(index)})
705
+ pt.text = str(category)
706
+
707
+ f_elem = lvl.getparent().xpath("./cx:f", namespaces=NS)
708
+ if f_elem:
709
+ f_elem[0].text = f"Sheet1!$A$2:$A${len(categories) + 1}"
710
+
711
+ val_lvl = chart_root.xpath("//cx:numDim[@type='val']/cx:lvl", namespaces=NS)
712
+ if val_lvl:
713
+ lvl = val_lvl[0]
714
+ lvl.set("ptCount", str(len(values)))
715
+
716
+ for pt in lvl.xpath("./cx:pt", namespaces=NS):
717
+ lvl.remove(pt)
718
+
719
+ for index, value in enumerate(values):
720
+ pt = etree.SubElement(lvl, f"{{{CX_NS}}}pt", {"idx": str(index)})
721
+ pt.text = str(value)
722
+
723
+ f_elem = lvl.getparent().xpath("./cx:f", namespaces=NS)
724
+ if f_elem:
725
+ f_elem[0].text = f"Sheet1!$B$2:$B${len(values) + 1}"
726
+
727
+ if subtotal_indices is not None:
728
+ subtotal_node = chart_root.xpath("//cx:layoutPr/cx:subtotals", namespaces=NS)
729
+ if subtotal_node:
730
+ subtotals = subtotal_node[0]
731
+
732
+ for idx_elem in subtotals.xpath("./cx:idx", namespaces=NS):
733
+ subtotals.remove(idx_elem)
734
+
735
+ for idx in subtotal_indices:
736
+ etree.SubElement(subtotals, f"{{{CX_NS}}}idx", {"val": str(idx)})
737
+
738
+ def _update_regular_chart_cache(self, chart_root, categories: list[str], values: list[float]) -> None:
739
+ str_caches = chart_root.xpath("//c:cat//c:strCache", namespaces=NS)
740
+ if str_caches:
741
+ self._replace_str_cache(str_caches[0], categories)
742
+
743
+ num_caches = chart_root.xpath("//c:val//c:numCache", namespaces=NS)
744
+ if num_caches:
745
+ self._replace_num_cache(num_caches[0], values)
746
+
747
+ def edit_waterfall_data_on_slide(self, slide_number: int, chart_name: str, categories: list[str], values: list[float]) -> None:
748
+ if len(categories) != len(values):
749
+ raise ValueError("categories and values must have the same length")
750
+
751
+ found = self.find_chart_on_slide(slide_number, chart_name)
752
+ chart_part = found["chart_part"]
753
+
754
+ chart_root = etree.fromstring(self.files[chart_part])
755
+
756
+ str_cache_nodes = chart_root.xpath(".//c:cat//c:strCache", namespaces=NS)
757
+ num_cache_nodes = chart_root.xpath(".//c:val//c:numCache", namespaces=NS)
758
+
759
+ if not str_cache_nodes:
760
+ raise ValueError("No category string cache found")
761
+
762
+ if not num_cache_nodes:
763
+ raise ValueError("No value numeric cache found")
764
+
765
+ self._replace_str_cache(str_cache_nodes[0], categories)
766
+ self._replace_num_cache(num_cache_nodes[0], values)
767
+
768
+ self.files[chart_part] = etree.tostring(chart_root, xml_declaration=True, encoding="UTF-8", standalone="yes")
769
+
770
+ self._has_changes = True
771
+
772
+ def edit_embedded_workbook_for_chart_on_slide(self, slide_number: int, chart_name: str, categories: list[str], values: list[float], sheet_name: str | None = None, subtotal_indices: list[int] | None = None) -> None:
773
+ if len(categories) != len(values):
774
+ raise ValueError("categories and values must have the same length")
775
+
776
+ found = self.find_chart_on_slide(slide_number, chart_name)
777
+ chart_part = found["chart_part"]
778
+
779
+ rels_part = f"{posixpath.dirname(chart_part)}/_rels/{posixpath.basename(chart_part)}.rels"
780
+
781
+ if rels_part not in self.files:
782
+ raise FileNotFoundError(f"Chart rels part not found: {rels_part}")
783
+
784
+ rels_root = etree.fromstring(self.files[rels_part])
785
+ workbook_target = self._get_embedded_workbook_target(chart_part, rels_root)
786
+
787
+ if not workbook_target:
788
+ raise ValueError("No embedded workbook relationship found for this chart")
789
+
790
+ if workbook_target not in self.files:
791
+ raise FileNotFoundError(f"Embedded workbook not found: {workbook_target}")
792
+
793
+ wb_bytes = BytesIO(self.files[workbook_target])
794
+ wb = load_workbook(wb_bytes)
795
+ sheet = wb[sheet_name] if sheet_name else wb.active
796
+
797
+ start_row = 2
798
+ end_row = max(sheet.max_row, len(categories) + 1)
799
+
800
+ for row in range(start_row, end_row + 1):
801
+ sheet[f"A{row}"] = None
802
+ sheet[f"B{row}"] = None
803
+
804
+ for row_index, (category, value) in enumerate(zip(categories, values), start=start_row):
805
+ sheet[f"A{row_index}"] = category
806
+ sheet[f"B{row_index}"] = value
807
+
808
+ out_wb = BytesIO()
809
+ wb.save(out_wb)
810
+ self.files[workbook_target] = out_wb.getvalue()
811
+
812
+ chart_root = etree.fromstring(self.files[chart_part])
813
+
814
+ self._update_chartex_chart(chart_root=chart_root, categories=categories, values=values, subtotal_indices=subtotal_indices)
815
+
816
+ self._update_regular_chart_cache(chart_root=chart_root, categories=categories, values=values)
817
+
818
+ self.files[chart_part] = etree.tostring(chart_root, xml_declaration=True, encoding="UTF-8", standalone="yes")
819
+
820
+ self._has_changes = True
821
+
822
+ # Backward-compatible anywhere methods
823
+ def edit_waterfall_data(self, chart_name: str, categories: list[str], values: list[float]) -> None:
824
+ found = self.find_chart_anywhere(chart_name)
825
+ self.edit_waterfall_data_on_slide(slide_number=found["slide_number"], chart_name=chart_name, categories=categories, values=values)
826
+
827
+ def edit_embedded_workbook_for_chart(self, chart_name: str, categories: list[str], values: list[float], sheet_name: str | None = None, subtotal_indices: list[int] | None = None) -> None:
828
+ found = self.find_chart_anywhere(chart_name)
829
+ self.edit_embedded_workbook_for_chart_on_slide(slide_number=found["slide_number"], chart_name=chart_name, categories=categories, values=values, sheet_name=sheet_name, subtotal_indices=subtotal_indices)
830
+
831
+ # PowerPoint COM refresh
832
+ def _refresh_powerpoint_chart(self, output_pptx: str, chart_name: str) -> None:
833
+ ppt_app = None
834
+ presentation = None
835
+
836
+ try:
837
+ ppt_app = win32com.client.Dispatch("PowerPoint.Application")
838
+ presentation = ppt_app.Presentations.Open(os.path.abspath(output_pptx))
839
+
840
+ for slide in presentation.Slides:
841
+ for shape in slide.Shapes:
842
+ if shape.Name == chart_name and shape.HasChart:
843
+ shape.Chart.Refresh()
844
+
845
+ presentation.Save()
846
+
847
+ except Exception as exc:
848
+ print(f"Warning: Could not refresh chart in PowerPoint: {exc}")
849
+
850
+ finally:
851
+ try:
852
+ if presentation is not None:
853
+ presentation.Close()
854
+ except Exception:
855
+ pass
856
+
857
+ try:
858
+ if ppt_app is not None:
859
+ ppt_app.Quit()
860
+ except Exception:
861
+ pass
862
+
863
+ def refresh_chart(self, chart_name: str, output_pptx: str) -> None:
864
+ self._refresh_powerpoint_chart(output_pptx, chart_name)
865
+
866
+ # Debug/listing utilities
867
+ def list_all_textboxes(self) -> None:
868
+ slide_parts = [(int(match.group(1)), name) for name in self.files if (match := SLIDE_RE.match(name))]
869
+ slide_parts.sort(key=lambda item: item[0])
870
+
871
+ for slide_number, slide_part in slide_parts:
872
+ slide_root = etree.fromstring(self.files[slide_part])
873
+
874
+ for shape in slide_root.xpath(".//p:sp", namespaces=NS):
875
+ cNvPr = shape.xpath("./p:nvSpPr/p:cNvPr", namespaces=NS)
876
+ if not cNvPr:
877
+ continue
878
+
879
+ name = cNvPr[0].get("name") or ""
880
+ txBody = shape.xpath("./p:txBody", namespaces=NS)
881
+
882
+ if txBody:
883
+ print(f"Slide {slide_number}: Textbox name={name!r}")
884
+
885
+ def list_graphic_frames(self) -> None:
886
+ slide_parts = [(int(match.group(1)), name) for name in self.files if (match := SLIDE_RE.match(name))]
887
+ slide_parts.sort(key=lambda item: item[0])
888
+
889
+ for slide_number, slide_part in slide_parts:
890
+ slide_root = etree.fromstring(self.files[slide_part])
891
+ frames = slide_root.xpath(".//p:graphicFrame", namespaces=NS)
892
+
893
+ if not frames:
894
+ continue
895
+
896
+ print(f"\nSlide {slide_number}")
897
+
898
+ for index, frame in enumerate(frames, start=1):
899
+ cNvPr = frame.xpath("./p:nvGraphicFramePr/p:cNvPr", namespaces=NS)
900
+ shape_name = cNvPr[0].get("name") if cNvPr else "<no name>"
901
+
902
+ chart_refs = frame.xpath(".//c:chart", namespaces=NS)
903
+ chartex_refs = frame.xpath(".//cx:chart", namespaces=NS)
904
+
905
+ print(
906
+ f" {index}. name={shape_name!r}, is_regular_chart={bool(chart_refs)}, is_chartex_chart={bool(chartex_refs)}"
907
+ )
908
+
909
+ def list_sections(self) -> None:
910
+ presentation_root = etree.fromstring(self.files["ppt/presentation.xml"])
911
+ sections = presentation_root.xpath(".//p14:section", namespaces=NS)
912
+
913
+ if not sections:
914
+ print("No sections found")
915
+ return
916
+
917
+ for index, section in enumerate(sections, start=1):
918
+ name = section.get("name") or ""
919
+ slide_ids = section.xpath("./p14:sldIdLst/p14:sldId", namespaces=NS)
920
+ print(f"{index}. {name!r}: {len(slide_ids)} slides")
921
+
922
+ def dump_chartex_debug(self, chart_name: str) -> None:
923
+ found = self.find_chart_anywhere(chart_name)
924
+ chart_part = found["chart_part"]
925
+ chart_filename = posixpath.basename(chart_part)
926
+ chart_dir = posixpath.dirname(chart_part)
927
+ rels_part = f"{chart_dir}/_rels/{chart_filename}.rels"
928
+
929
+ print("FOUND:", found)
930
+ print()
931
+
932
+ print("=== CHART PART ===")
933
+ print(chart_part)
934
+ print(self.files[chart_part].decode("utf-8", errors="ignore")[:8000])
935
+ print()
936
+
937
+ if rels_part in self.files:
938
+ print("=== CHART RELS ===")
939
+ print(rels_part)
940
+ print(self.files[rels_part].decode("utf-8", errors="ignore")[:8000])
941
+ else:
942
+ print("No chart rels part found:", rels_part)
943
+
944
+ # ============================================================
945
+ # Table lookup and editing
946
+ # ============================================================
947
+ def find_table_on_slide(self, slide_number: int, table_name: str) -> dict:
948
+ """Locate a table (graphicFrame containing an a:tbl) by its shape name on a slide.
949
+
950
+ Returns a dict with slide_number and slide_part if found.
951
+ """
952
+ wanted = table_name.strip().casefold()
953
+
954
+ slide_part = f"ppt/slides/slide{slide_number}.xml"
955
+ if slide_part not in self.files:
956
+ raise FileNotFoundError(f"Slide not found: {slide_part}")
957
+
958
+ slide_root = etree.fromstring(self.files[slide_part])
959
+
960
+ for frame in slide_root.xpath('.//p:graphicFrame', namespaces=NS):
961
+ cNvPr = frame.xpath('./p:nvGraphicFramePr/p:cNvPr', namespaces=NS)
962
+ if not cNvPr:
963
+ continue
964
+
965
+ actual_name = (cNvPr[0].get('name') or '').strip().casefold()
966
+ if actual_name != wanted:
967
+ continue
968
+
969
+ # check if this graphicFrame contains a table
970
+ tbl = frame.xpath('.//a:tbl', namespaces=NS)
971
+ if not tbl:
972
+ continue
973
+
974
+ return {
975
+ 'slide_number': slide_number,
976
+ 'slide_part': slide_part,
977
+ }
978
+
979
+ raise ValueError(f"Table named '{table_name}' not found on slide {slide_number}")
980
+
981
+ def find_table_anywhere(self, table_name: str) -> dict:
982
+ slide_parts = [(int(match.group(1)), name) for name in self.files if (match := SLIDE_RE.match(name))]
983
+ slide_parts.sort(key=lambda item: item[0])
984
+
985
+ for slide_number, _slide_part in slide_parts:
986
+ try:
987
+ return self.find_table_on_slide(slide_number, table_name)
988
+ except ValueError:
989
+ continue
990
+
991
+ raise ValueError(f"Table named '{table_name}' not found anywhere in presentation")
992
+
993
+ def _set_table_cell_text(self, txBody_elem, text: str) -> None:
994
+ """Helper: replace the text content of a table cell's txBody."""
995
+ paragraphs = self._text_to_paragraph_runs(text)
996
+ self._replace_textbox_runs(txBody_elem, paragraphs)
997
+
998
+ def edit_table_cell_on_slide(self, slide_number: int, table_name: str, row: int, col: int, new_text: str) -> None:
999
+ """Edit a single table cell by 0-based `row` and `col` indices on a slide."""
1000
+ info = self.find_table_on_slide(slide_number, table_name)
1001
+ slide_part = info['slide_part']
1002
+
1003
+ slide_root = etree.fromstring(self.files[slide_part])
1004
+
1005
+ # find the specific frame again and the table element
1006
+ for frame in slide_root.xpath('.//p:graphicFrame', namespaces=NS):
1007
+ cNvPr = frame.xpath('./p:nvGraphicFramePr/p:cNvPr', namespaces=NS)
1008
+ if not cNvPr:
1009
+ continue
1010
+
1011
+ actual_name = (cNvPr[0].get('name') or '').strip().casefold()
1012
+ if actual_name != table_name.strip().casefold():
1013
+ continue
1014
+
1015
+ tbls = frame.xpath('.//a:tbl', namespaces=NS)
1016
+ if not tbls:
1017
+ continue
1018
+
1019
+ tbl = tbls[0]
1020
+ rows = tbl.xpath('./a:tr', namespaces=NS)
1021
+ if row < 0 or row >= len(rows):
1022
+ raise IndexError('row index out of range')
1023
+
1024
+ cells = rows[row].xpath('./a:tc', namespaces=NS)
1025
+ if col < 0 or col >= len(cells):
1026
+ raise IndexError('col index out of range')
1027
+
1028
+ cell = cells[col]
1029
+ txBody = cell.xpath('.//a:txBody', namespaces=NS)
1030
+ if not txBody:
1031
+ raise ValueError('Table cell has no text body')
1032
+
1033
+ self._set_table_cell_text(txBody[0], new_text)
1034
+
1035
+ # persist changes
1036
+ self.files[slide_part] = etree.tostring(slide_root, xml_declaration=True, encoding='UTF-8', standalone='yes')
1037
+ self._has_changes = True
1038
+ return
1039
+
1040
+ raise ValueError(f"Table named '{table_name}' not found on slide {slide_number}")
1041
+
1042
+ def edit_table_range_on_slide(self, slide_number: int, table_name: str, data: list[list[str]]) -> None:
1043
+ """Write a 2D list of strings into a table on a slide. Rows/cols must fit the table."""
1044
+ info = self.find_table_on_slide(slide_number, table_name)
1045
+ slide_part = info['slide_part']
1046
+
1047
+ slide_root = etree.fromstring(self.files[slide_part])
1048
+
1049
+ for frame in slide_root.xpath('.//p:graphicFrame', namespaces=NS):
1050
+ cNvPr = frame.xpath('./p:nvGraphicFramePr/p:cNvPr', namespaces=NS)
1051
+ if not cNvPr:
1052
+ continue
1053
+
1054
+ actual_name = (cNvPr[0].get('name') or '').strip().casefold()
1055
+ if actual_name != table_name.strip().casefold():
1056
+ continue
1057
+
1058
+ tbls = frame.xpath('.//a:tbl', namespaces=NS)
1059
+ if not tbls:
1060
+ continue
1061
+
1062
+ tbl = tbls[0]
1063
+ rows = tbl.xpath('./a:tr', namespaces=NS)
1064
+
1065
+ if len(data) > len(rows):
1066
+ raise ValueError('Provided data has more rows than the table')
1067
+
1068
+ for r_idx, row_vals in enumerate(data):
1069
+ cells = rows[r_idx].xpath('./a:tc', namespaces=NS)
1070
+ if len(row_vals) > len(cells):
1071
+ raise ValueError(f'Row {r_idx} has more columns than the table')
1072
+
1073
+ for c_idx, val in enumerate(row_vals):
1074
+ cell = cells[c_idx]
1075
+ txBody = cell.xpath('.//a:txBody', namespaces=NS)
1076
+ if not txBody:
1077
+ raise ValueError('Table cell has no text body')
1078
+ self._set_table_cell_text(txBody[0], val)
1079
+
1080
+ # persist changes
1081
+ self.files[slide_part] = etree.tostring(slide_root, xml_declaration=True, encoding='UTF-8', standalone='yes')
1082
+ self._has_changes = True
1083
+ return
1084
+
1085
+ raise ValueError(f"Table named '{table_name}' not found on slide {slide_number}")
@@ -0,0 +1,102 @@
1
+ Metadata-Version: 2.4
2
+ Name: xmlppt
3
+ Version: 0.1.0
4
+ Summary: PowerPoint editing via Open XML package manipulation
5
+ Author: Your Name
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/jlondon626/powerpoint_editor
8
+ Project-URL: Repository, https://github.com/jlondon626/powerpoint_editor
9
+ Keywords: pptx,powerpoint,openxml,slides,excel
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Operating System :: Microsoft :: Windows
15
+ Requires-Python: >=3.10
16
+ Description-Content-Type: text/markdown
17
+ Requires-Dist: lxml>=4.0.0
18
+ Requires-Dist: openpyxl>=3.0.0
19
+ Requires-Dist: pywin32>=306; sys_platform == "win32"
20
+
21
+ # xmlppt
22
+
23
+ A small utility package for editing PowerPoint (.pptx) files by
24
+ manipulating the Open XML package directly. It supports duplicating
25
+ template slides, editing named textboxes, updating embedded Excel
26
+ workbooks for charts, and basic listing/debug utilities.
27
+
28
+ This repository contains a single package `xmlppt` with the main
29
+ implementation in `xmlppt/editor.py` and a minimal `main.py` example
30
+ entrypoint.
31
+
32
+ Requirements
33
+ - Python 3.10+
34
+ - lxml
35
+ - openpyxl
36
+ - pywin32 (only required for `refresh_chart()` on Windows)
37
+
38
+ Install
39
+
40
+ Create a virtual environment and install dependencies:
41
+
42
+ ```bash
43
+ python -m venv .venv
44
+ .venv\Scripts\Activate.ps1 # Windows PowerShell
45
+ pip install -r requirements.txt
46
+ ```
47
+
48
+ Install the package locally for development:
49
+
50
+ ```bash
51
+ pip install -e .
52
+ ```
53
+
54
+ Quick usage (programmatic)
55
+
56
+ ```python
57
+ from xmlppt import PowerPointEditor
58
+
59
+ editor = PowerPointEditor('input.pptx')
60
+ editor.duplicate_template_slide('RESERVE_WATERFALL')
61
+ editor.edit_textbox_html('AOM Text', '<b>Updated</b>')
62
+ editor.save('output.pptx')
63
+ ```
64
+
65
+ CLI example
66
+
67
+ Run the example entrypoint which shows basic diagnostics and attempts
68
+ to run a sample flow (expects a marker template slide):
69
+
70
+ ```bash
71
+ python main.py --input example.pptx --run-example
72
+ ```
73
+
74
+ Run tests
75
+
76
+ ```bash
77
+ pytest -q
78
+ ```
79
+
80
+ CI
81
+
82
+ A GitHub Actions workflow is included at `.github/workflows/python-package.yml`
83
+ that installs the package and runs the test suite on Windows.
84
+
85
+ Publishing
86
+
87
+ A release workflow is included at `.github/workflows/publish.yml`.
88
+ When you create a GitHub release and publish it, the workflow will build
89
+ and upload the package to PyPI using the `PYPI_API_TOKEN` secret.
90
+
91
+ To configure publishing:
92
+
93
+ 1. Create a PyPI API token at https://pypi.org/manage/account/token/
94
+ 2. Add it to your repository secrets as `PYPI_API_TOKEN`
95
+ 3. Create a release on GitHub
96
+
97
+ Notes
98
+ - The `refresh_chart()` function uses COM automation to refresh chart
99
+ visuals inside PowerPoint; this only works on Windows with PowerPoint
100
+ installed.
101
+ - The package manipulates the raw PPTX (zip) contents; always test on
102
+ copies of presentations before running on production files.
@@ -0,0 +1,6 @@
1
+ xmlppt/__init__.py,sha256=sm-J_Vjv6jLSsp8UbUT4eWa8SFl6-TbNjWo27cQC5pQ,72
2
+ xmlppt/editor.py,sha256=Ddm580DSEbxH4Xfo4u1HKb3lR7C5HhGlYArMadwtBtc,44901
3
+ xmlppt-0.1.0.dist-info/METADATA,sha256=q2bxG7NdkPtSm9dyybHOpJRomdEBeTnNPn-ybKVrLgw,2987
4
+ xmlppt-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
5
+ xmlppt-0.1.0.dist-info/top_level.txt,sha256=-0Xv__j59Mliw5TUB9CkEgym1zdsJtbVcNNmnXCLnnI,7
6
+ xmlppt-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ xmlppt