valediction 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. valediction/__init__.py +8 -0
  2. valediction/convenience.py +50 -0
  3. valediction/data_types/__init__.py +0 -0
  4. valediction/data_types/data_type_helpers.py +75 -0
  5. valediction/data_types/data_types.py +58 -0
  6. valediction/data_types/type_inference.py +541 -0
  7. valediction/datasets/__init__.py +0 -0
  8. valediction/datasets/datasets.py +870 -0
  9. valediction/datasets/datasets_helpers.py +46 -0
  10. valediction/demo/DEMO - Data Dictionary.xlsx +0 -0
  11. valediction/demo/DEMOGRAPHICS.csv +101 -0
  12. valediction/demo/DIAGNOSES.csv +650 -0
  13. valediction/demo/LAB_TESTS.csv +1001 -0
  14. valediction/demo/VITALS.csv +1001 -0
  15. valediction/demo/__init__.py +6 -0
  16. valediction/demo/demo_dictionary.py +129 -0
  17. valediction/dictionary/__init__.py +0 -0
  18. valediction/dictionary/exporting.py +501 -0
  19. valediction/dictionary/exporting_helpers.py +371 -0
  20. valediction/dictionary/generation.py +357 -0
  21. valediction/dictionary/helpers.py +174 -0
  22. valediction/dictionary/importing.py +494 -0
  23. valediction/dictionary/integrity.py +37 -0
  24. valediction/dictionary/model.py +582 -0
  25. valediction/dictionary/template/PROJECT - Data Dictionary.xltx +0 -0
  26. valediction/exceptions.py +22 -0
  27. valediction/integrity.py +97 -0
  28. valediction/io/__init__.py +0 -0
  29. valediction/io/csv_readers.py +307 -0
  30. valediction/progress.py +206 -0
  31. valediction/support.py +72 -0
  32. valediction/validation/__init__.py +0 -0
  33. valediction/validation/helpers.py +315 -0
  34. valediction/validation/issues.py +280 -0
  35. valediction/validation/validation.py +598 -0
  36. valediction-1.0.0.dist-info/METADATA +15 -0
  37. valediction-1.0.0.dist-info/RECORD +38 -0
  38. valediction-1.0.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,6 @@
1
+ from pathlib import Path
2
+
3
+ from valediction.demo.demo_dictionary import demo_dictionary # noqa
4
+
5
+ DEMO_DATA = Path(__file__).resolve().parent
6
+ DEMO_DICTIONARY = DEMO_DATA / "DEMO - Data Dictionary.xlsx"
@@ -0,0 +1,129 @@
1
+ from valediction.dictionary.model import Column, Dictionary, Table
2
+
3
+
4
+ def demo_dictionary() -> Dictionary:
5
+ """Get a Dictionary object equivalent to the demo dataset.
6
+
7
+ Returns:
8
+ Dictionary: Demo Dictionary
9
+ """
10
+ demographics = Table(
11
+ name="demographics",
12
+ columns=[
13
+ Column(
14
+ name="PATIENT_HASH", order=1, data_type="text", length=12, primary_key=1
15
+ ),
16
+ Column(name="DATE_OF_BIRTH", order=2, data_type="date"),
17
+ Column(name="ETHNICITY", order=3, data_type="text", length=1),
18
+ Column(name="SEX", order=4, data_type="text", length=6),
19
+ Column(name="DATE_OF_DEATH", order=5, data_type="date"),
20
+ ],
21
+ description="Demographic information for synthetic patients",
22
+ )
23
+
24
+ diagnoses = Table(
25
+ name="diagnoses",
26
+ columns=[
27
+ Column(
28
+ name="PATIENT_HASH",
29
+ order=1,
30
+ data_type="text",
31
+ length=12,
32
+ primary_key=1,
33
+ foreign_key="DEMOGRAPHICS.PATIENT_HASH",
34
+ ),
35
+ Column(name="DATE_OF_RECORDING", order=2, data_type="date", primary_key=2),
36
+ Column(
37
+ name="DIAGNOSIS_CODE",
38
+ order=3,
39
+ data_type="text",
40
+ length=6,
41
+ primary_key=3,
42
+ ),
43
+ Column(
44
+ name="PRIMARY_DIAGNOSIS",
45
+ order=4,
46
+ data_type="text",
47
+ length=1,
48
+ enumerations={"Y": "Primary Diagnosis", "N": "Comorbidity"},
49
+ ),
50
+ ],
51
+ description="ICD diagnoses for synthetic patients",
52
+ )
53
+
54
+ lab_tests = Table(
55
+ name="lab_tests",
56
+ columns=[
57
+ Column(
58
+ name="PATIENT_HASH",
59
+ order=1,
60
+ data_type="text",
61
+ length=12,
62
+ primary_key=1,
63
+ foreign_key="DEMOGRAPHICS.PATIENT_HASH",
64
+ ),
65
+ Column(name="SAMPLE_DATE", order=2, data_type="date", primary_key=2),
66
+ Column(name="RESULT_DATE", order=3, data_type="date", primary_key=3),
67
+ Column(name="SAMPLE_TYPE", order=4, data_type="text", length=32),
68
+ Column(
69
+ name="TEST_TYPE", order=5, data_type="text", length=32, primary_key=4
70
+ ),
71
+ Column(name="RESULT_RAW", order=6, data_type="text", length=256),
72
+ Column(name="UNITS", order=7, data_type="text", length=16),
73
+ Column(name="RESULT_NUMERIC", order=8, data_type="float"),
74
+ Column(
75
+ name="RESULT_PROCESSED",
76
+ order=9,
77
+ data_type="text",
78
+ length=8,
79
+ enumerations={"positive": "Positive", "negative": "Negative"},
80
+ ),
81
+ Column(
82
+ name="OPERATOR",
83
+ order=10,
84
+ data_type="text",
85
+ length=1,
86
+ enumerations={"-": "Less Than", "+": "Greater Than"},
87
+ ),
88
+ Column(name="RANGE_LOW", order=11, data_type="float"),
89
+ Column(name="RANGE_HIGH", order=12, data_type="float"),
90
+ ],
91
+ description="Lab test results for synthetic patients",
92
+ )
93
+
94
+ vitals = Table(
95
+ name="vitals",
96
+ columns=[
97
+ Column(
98
+ name="PATIENT_HASH",
99
+ order=1,
100
+ data_type="text",
101
+ length=12,
102
+ primary_key=1,
103
+ foreign_key="DEMOGRAPHICS.PATIENT_HASH",
104
+ ),
105
+ Column(
106
+ name="OBSERVATION_TIME", order=2, data_type="datetime", primary_key=2
107
+ ),
108
+ Column(
109
+ name="OBSERVATION_TYPE",
110
+ order=3,
111
+ data_type="text",
112
+ length=32,
113
+ primary_key=3,
114
+ ),
115
+ Column(name="RESULT", order=4, data_type="float"),
116
+ ],
117
+ description="Numeric vital sign obs for synthetic patients",
118
+ )
119
+
120
+ demo_dictionary = Dictionary(
121
+ name="Synthetic Data",
122
+ version="v0.1",
123
+ version_notes="Synthethic dataset for allowing Wessex SDE onboarding",
124
+ inclusion_criteria="* synthetic patients",
125
+ exclusion_criteria="* real patients",
126
+ tables=[demographics, diagnoses, lab_tests, vitals],
127
+ )
128
+
129
+ return demo_dictionary
File without changes
@@ -0,0 +1,501 @@
1
+ from pathlib import Path
2
+ from typing import Any, Dict, Iterable, List, Mapping, Optional
3
+
4
+ from openpyxl import Workbook, load_workbook
5
+ from openpyxl.utils import get_column_letter, range_boundaries
6
+ from openpyxl.worksheet.table import Table as ExcelTable
7
+ from openpyxl.worksheet.worksheet import Worksheet
8
+
9
+ from valediction.dictionary.exporting_helpers import (
10
+ CalculatedColInfo,
11
+ CFRuleInfo,
12
+ DVRuleInfo,
13
+ _build_row_list_from_mapping,
14
+ _collect_conditional_formats,
15
+ _collect_data_validations,
16
+ _collect_table_formulas,
17
+ _extend_cf_for_new_row,
18
+ _extend_dv_for_new_row,
19
+ _find_label_cell,
20
+ _first_blank_data_row,
21
+ _table_column_index_map,
22
+ )
23
+ from valediction.dictionary.integrity import (
24
+ DD_COLUMN_MAP,
25
+ DD_TABLE_MAP,
26
+ )
27
+ from valediction.dictionary.model import Dictionary
28
+ from valediction.exceptions import (
29
+ DataDictionaryError,
30
+ DataDictionaryExportError,
31
+ DataDictionaryImportError,
32
+ )
33
+ from valediction.integrity import get_config
34
+
35
+
36
+ class Exporter:
37
+ def __init__(
38
+ self,
39
+ dictionary: Dictionary | None = None,
40
+ debug: bool = False,
41
+ template_path: str | Path | None = None,
42
+ ):
43
+ # Settings
44
+ self.debug: bool = debug
45
+ self.debug_report: list[str] = []
46
+ self.template_path: Path = (
47
+ Path(template_path)
48
+ if template_path is not None
49
+ else Path(get_config().template_data_dictionary_path)
50
+ )
51
+
52
+ # Workbook
53
+ self.workbook: Workbook | None = None
54
+
55
+ # Dictionary
56
+ self.dictionary: Dictionary | None = None
57
+
58
+ # Lookups
59
+ self.sheets: dict[str, Worksheet] = {}
60
+ self.tables: dict[str, ExcelTable] = {}
61
+ self.table_column_maps: dict[str, dict[str, int]] = {}
62
+
63
+ # Formatting
64
+ self.conditional_formats: dict[str, list[CFRuleInfo]] = {}
65
+ self.data_validations: dict[str, list[DVRuleInfo]] = {}
66
+ self.calculated_columns: dict[str, list[CalculatedColInfo]] = {}
67
+
68
+ # Setup
69
+ if dictionary:
70
+ self.load_dictionary(dictionary)
71
+ else:
72
+ self.__say(
73
+ "Exporter instantiated without dictionary - remember to load_dictionary()"
74
+ )
75
+ self.load_template()
76
+
77
+ # Magic #
78
+ def __repr__(self):
79
+ return f"Exporter(debug={self.debug})"
80
+
81
+ def __say(
82
+ self,
83
+ *values: object,
84
+ sep: str | None = " ",
85
+ end: str | None = "\n",
86
+ ) -> None:
87
+ msg = sep.join(map(str, values)) + (end or "")
88
+ self.debug_report.append(msg)
89
+ if self.debug:
90
+ print("Exporter:", *values, sep=sep, end=end)
91
+
92
+ # Main Functions #
93
+
94
+ # High Helpers #
95
+ def load_dictionary(self, dictionary: Dictionary):
96
+ if not isinstance(dictionary, Dictionary):
97
+ raise DataDictionaryError("Dictionary must be a Dictionary object")
98
+ self.dictionary = dictionary
99
+
100
+ def load_template(self) -> None:
101
+ supported = {".xltx", ".xlsx"}
102
+ if not self.template_path.exists():
103
+ raise DataDictionaryImportError(f"Template not found: {self.template_path}")
104
+
105
+ if self.template_path.suffix.lower() not in supported:
106
+ raise DataDictionaryImportError(
107
+ f"Unexpected template extension: {self.template_path.suffix}; supported: {supported}"
108
+ )
109
+
110
+ self.workbook = load_workbook(
111
+ self.template_path, data_only=False, keep_vba=False
112
+ )
113
+
114
+ self.workbook.template = False
115
+ self.__say(f"Loaded workbook from {self.template_path}")
116
+ self.load_sheets_and_tables()
117
+ self._validate_required_columns()
118
+ self.inspect_template_features()
119
+
120
+ def load_sheets_and_tables(self) -> None:
121
+ if self.workbook is None:
122
+ raise DataDictionaryImportError("Workbook not loaded.")
123
+
124
+ for sheet_key, table_name in DD_TABLE_MAP.items():
125
+ worksheet = self._get_worksheet(sheet_key)
126
+ self.sheets[sheet_key.lower()] = worksheet
127
+ attr_name = f"{sheet_key.lower()}_ws"
128
+ setattr(self, attr_name, worksheet)
129
+
130
+ if table_name:
131
+ table = self._get_table(sheet_key, table_name)
132
+ self.tables[table_name.lower()] = table
133
+
134
+ self.__say("Required sheets and tables found and loaded.")
135
+
136
+ def inspect_template_features(self) -> None:
137
+ self.__say(
138
+ "Inspecting Conditional Formatting [CF], Data Validations [DV], Calculated Columns [CC]:"
139
+ )
140
+ # 1) Conditional formatting per sheet
141
+ for key, worksheet in self.sheets.items():
142
+ rules = list(_collect_conditional_formats(worksheet))
143
+ self.conditional_formats[key] = rules
144
+ self.__say(f"[CF] {worksheet.title}: {len(rules)} rules")
145
+
146
+ # 2) Data validation per sheet
147
+ for key, worksheet in self.sheets.items():
148
+ dv_rules = list(_collect_data_validations(worksheet))
149
+ self.data_validations[key] = dv_rules
150
+ self.__say(f"[DV] {worksheet.title}: {len(dv_rules)} rules")
151
+
152
+ # 3) Calculated columns per table
153
+ for tkey, table in self.tables.items():
154
+ worksheet = self._worksheet_of_table(table)
155
+ cols = _collect_table_formulas(table)
156
+ self.calculated_columns[tkey] = cols
157
+ self.__say(
158
+ f"[CC] {self._table_name(table)}: {len(cols)} calculated columns"
159
+ )
160
+
161
+ self.__say("Inspection complete")
162
+
163
+ def add_row(
164
+ self, table: str, data: Optional[List] = None, quiet: bool = False
165
+ ) -> int:
166
+ data = [] if data is None else list(data)
167
+ tkey = table.lower()
168
+ tbl = self.tables.get(tkey)
169
+ if tbl is None:
170
+ raise DataDictionaryImportError(f"Table '{table}' not loaded.")
171
+
172
+ ws = self._worksheet_of_table(tbl)
173
+ min_c, min_r, max_c, max_r = range_boundaries(tbl.ref)
174
+ ncols = max_c - min_c + 1
175
+
176
+ # Column map (lower header -> 0-based index within table)
177
+ colmap = self.table_column_maps.get(tkey)
178
+ if not colmap:
179
+ colmap = _table_column_index_map(tbl)
180
+ self.table_column_maps[tkey] = colmap
181
+
182
+ # Calculated/formula columns from metadata (lower header -> '=...')
183
+ formulas: Dict[str, str] = _collect_table_formulas(tbl)
184
+
185
+ # Data length sanity
186
+ if len(data) > ncols:
187
+ raise DataDictionaryError(
188
+ f"Too many values for '{self._table_name(tbl)}': {len(data)} provided, {ncols} columns."
189
+ )
190
+ if len(data) < ncols:
191
+ data += [None] * (ncols - len(data))
192
+
193
+ # Decide target row: reuse first blank data row, else append
194
+ reuse_row = _first_blank_data_row(ws, tbl, colmap, formulas)
195
+ append = reuse_row is None
196
+ target_row = (max_r + 1) if append else reuse_row
197
+
198
+ # Write row; ignore user values for formula columns
199
+ idx_to_header = {v: k for k, v in colmap.items()}
200
+ for j_idx in range(1, ncols + 1):
201
+ abs_col = min_c + j_idx - 1
202
+ cell = ws.cell(target_row, abs_col)
203
+ header_lower = idx_to_header.get(j_idx, "")
204
+ if header_lower in formulas and isinstance(formulas[header_lower], str):
205
+ cell.value = formulas[header_lower]
206
+ else:
207
+ cell.value = data[j_idx - 1]
208
+
209
+ # Only extend table + CF/DV if we actually appended
210
+ if append:
211
+ old_bottom = max_r
212
+ new_ref = f"{get_column_letter(min_c)}{min_r}:{get_column_letter(max_c)}{target_row}"
213
+ tbl.ref = new_ref
214
+ if getattr(tbl, "autoFilter", None) is not None:
215
+ tbl.autoFilter.ref = new_ref
216
+
217
+ _extend_cf_for_new_row(
218
+ ws, (min_c, max_c), old_bottom=old_bottom, new_row=target_row
219
+ )
220
+ _extend_dv_for_new_row(ws, (min_c, max_c), new_row=target_row)
221
+
222
+ if not quiet:
223
+ self.__say(
224
+ f"{'Appended' if append else 'Reused'} row {target_row} in '{self._table_name(tbl)}' (width={ncols})."
225
+ )
226
+ return target_row
227
+
228
+ def export(
229
+ self,
230
+ directory: Path | str,
231
+ filename: str | None = None,
232
+ overwrite: bool = False,
233
+ ) -> Path:
234
+ directory = Path(directory)
235
+ filename = f"{filename}.xlsx" if filename else "PROJECT - Data Dictionary.xlsx"
236
+ out_path = directory / filename
237
+
238
+ # Prepare destination
239
+ out_path.parent.mkdir(parents=True, exist_ok=True)
240
+
241
+ # Overwrite check
242
+ if out_path.exists() and not overwrite:
243
+ raise DataDictionaryExportError(
244
+ f"File exists and overwrite=False: {out_path}"
245
+ )
246
+
247
+ # Save
248
+ try:
249
+ self.workbook.save(out_path)
250
+ except PermissionError as e:
251
+ raise DataDictionaryExportError(
252
+ f"Cannot write '{out_path}'. Is the file open? ({e})"
253
+ ) from e
254
+
255
+ self.__say(f"Exported workbook → {out_path}")
256
+ # optional: remember last export location
257
+ self.last_export_path = out_path
258
+ self.workbook.close()
259
+ return out_path
260
+
261
+ def add_rows(
262
+ self, table: str, rows: Iterable[Mapping[str, Any] | List[Any]]
263
+ ) -> List[int]:
264
+ tkey = table.lower()
265
+ tbl = self.tables.get(tkey)
266
+ if tbl is None:
267
+ raise DataDictionaryImportError(f"Table '{table}' not loaded.")
268
+
269
+ colmap = self.table_column_maps.get(tkey) or _table_column_index_map(tbl)
270
+ self.table_column_maps[tkey] = colmap
271
+
272
+ formulas: Dict[str, str] = _collect_table_formulas(tbl)
273
+
274
+ written_rows: List[int] = []
275
+ for row in rows:
276
+ if isinstance(row, Mapping):
277
+ data_list = _build_row_list_from_mapping(tbl, colmap, formulas, row)
278
+ else:
279
+ data_list = list(row)
280
+ written_rows.append(self.add_row(table, data_list, quiet=True))
281
+
282
+ self.__say(f"Wrote {len(written_rows)} row(s) to '{self._table_name(tbl)}'")
283
+ return written_rows
284
+
285
+ def write_details(self) -> dict[str, str]:
286
+ if self.dictionary is None:
287
+ raise DataDictionaryError(
288
+ "No Dictionary loaded. Call load_dictionary() first."
289
+ )
290
+
291
+ ws = self.sheets.get("details") or self._get_worksheet("details")
292
+
293
+ plan: list[tuple[str, object]] = [
294
+ ("Name", self.dictionary.name),
295
+ ("Organisation(s)", self.dictionary.organisations),
296
+ ("Version", self.dictionary.version),
297
+ ("Version Notes", self.dictionary.version_notes),
298
+ ("Inclusion Criteria", self.dictionary.inclusion_criteria),
299
+ ("Exclusion Criteria", self.dictionary.exclusion_criteria),
300
+ ]
301
+
302
+ written: dict[str, str] = {}
303
+ for label, value in plan:
304
+ if value is None:
305
+ continue # leave whatever the template has
306
+ r, c = _find_label_cell(ws, label)
307
+ dest_col = c + 1 # write in the next cell to the right
308
+ ws.cell(r, dest_col).value = value
309
+ addr = f"{get_column_letter(dest_col)}{r}"
310
+ written[label] = addr
311
+ self.__say(f"[Details] {label} → {addr}")
312
+
313
+ return written
314
+
315
+ def write_tables(self) -> List[int]:
316
+ rows = []
317
+ for table in self.dictionary:
318
+ rows.append(
319
+ {
320
+ "table": table.name,
321
+ "description": table.description,
322
+ }
323
+ )
324
+ self.__say(f"Writing {len(rows)} row(s) to Tables")
325
+ return self.add_rows("tables", rows)
326
+
327
+ def write_columns(self) -> List[int]:
328
+ rows = []
329
+ for table in self.dictionary:
330
+ for column in table:
331
+ rows.append(
332
+ {
333
+ "table": table.name,
334
+ "column": column.name,
335
+ "order": column.order,
336
+ "data type": str(
337
+ column.data_type
338
+ ), # DataType -> 'Text', 'Integer', …
339
+ "length": column.length
340
+ if column.data_type.allows_length()
341
+ else None,
342
+ "vocabularies": column.vocabulary,
343
+ "enumerations": "Y" if column.enumerations else None,
344
+ "primary key": column.primary_key,
345
+ "foreign key target": column.foreign_key,
346
+ "column description": column.description,
347
+ # 'checks' is calculated by the template
348
+ }
349
+ )
350
+ self.__say(f"Writing {len(rows)} row(s) to Columns")
351
+ return self.add_rows("columns", rows)
352
+
353
+ def write_enumerations(self) -> List[int]:
354
+ rows = []
355
+ for table in self.dictionary:
356
+ for column in table:
357
+ if not column.enumerations:
358
+ continue
359
+ for code, name in column.enumerations.items():
360
+ rows.append(
361
+ {
362
+ "table": table.name,
363
+ "column": column.name,
364
+ "code": code,
365
+ "name": name,
366
+ # 'checks' is calculated by the template
367
+ }
368
+ )
369
+ self.__say(f"Writing {len(rows)} row(s) to Enumerations")
370
+ return self.add_rows("enumerations", rows)
371
+
372
+ # Low Helpers #
373
+ def _get_worksheet(self, sheet_name: str) -> Worksheet:
374
+ """Return a worksheet by name (case-insensitive) and cache in self.sheets."""
375
+ target = sheet_name.lower()
376
+ matches = [ws for ws in self.workbook.worksheets if ws.title.lower() == target]
377
+ if not matches:
378
+ raise DataDictionaryImportError(f"Required sheet not found: '{sheet_name}'")
379
+ if len(matches) > 1:
380
+ titles = ", ".join(ws.title for ws in matches)
381
+ raise DataDictionaryImportError(
382
+ f"Duplicate sheets for '{sheet_name}' (case-insensitive): {titles}"
383
+ )
384
+ ws = matches[0]
385
+ self.sheets[target] = ws
386
+ return ws
387
+
388
+ def _table_name(self, table_name: ExcelTable) -> str:
389
+ """Return canonical table name from workbook."""
390
+ return (
391
+ getattr(table_name, "displayName", None)
392
+ or getattr(table_name, "name", None)
393
+ or ""
394
+ ).strip()
395
+
396
+ def _get_table(self, sheet_name: str, table_name: str) -> ExcelTable:
397
+ """Return a unique table and cache in self.tables."""
398
+ ws_expected = self._get_worksheet(sheet_name)
399
+ target = table_name.lower()
400
+
401
+ matches = []
402
+ for worksheet in self.workbook.worksheets:
403
+ for tbl in getattr(worksheet, "tables", {}).values():
404
+ if self._table_name(tbl).lower() == target:
405
+ matches.append((worksheet, tbl))
406
+
407
+ if not matches:
408
+ raise DataDictionaryImportError(f"Required table not found: '{table_name}'")
409
+ if len(matches) > 1:
410
+ locs = ", ".join(
411
+ f"{self._table_name(table)}@{worksheet.title}"
412
+ for worksheet, table in matches
413
+ )
414
+ raise DataDictionaryImportError(
415
+ f"Table name '{table_name}' is not unique across workbook: {locs}"
416
+ )
417
+
418
+ ws_found, tbl = matches[0]
419
+ if ws_found is not ws_expected:
420
+ raise DataDictionaryImportError(
421
+ f"Table '{self._table_name(tbl)}' is on sheet '{ws_found.title}', "
422
+ f"expected on '{ws_expected.title}'"
423
+ )
424
+
425
+ self.tables[target] = tbl
426
+ return tbl
427
+
428
+ def _worksheet_of_table(self, tbl: ExcelTable) -> Worksheet:
429
+ """Find and return the worksheet that owns this table."""
430
+ for ws in self.workbook.worksheets:
431
+ for t in getattr(ws, "tables", {}).values():
432
+ if t is tbl:
433
+ return ws
434
+ raise DataDictionaryImportError("Table does not belong to any worksheet.")
435
+
436
+ def _validate_required_columns(self) -> None:
437
+ self.__say("Validating required columns [RC]:")
438
+
439
+ for sheet_key, required_cols in DD_COLUMN_MAP.items():
440
+ table_name = DD_TABLE_MAP.get(sheet_key)
441
+ if not table_name or required_cols is None:
442
+ self.__say(f"[RC] {sheet_key}: no table requirements")
443
+ continue
444
+
445
+ tkey = table_name.lower()
446
+ tbl = self.tables.get(tkey)
447
+ if tbl is None:
448
+ raise DataDictionaryImportError(
449
+ f"Required table '{table_name}' for sheet '{sheet_key}' not loaded."
450
+ )
451
+
452
+ colmap = _table_column_index_map(tbl)
453
+ self.table_column_maps[tkey] = colmap
454
+
455
+ missing: list[str] = []
456
+ for rc in required_cols:
457
+ rc_key = (rc or "").strip().lower()
458
+ if rc_key not in colmap:
459
+ missing.append(rc)
460
+
461
+ if missing:
462
+ raise DataDictionaryImportError(
463
+ f"Missing required columns in table '{table_name}' (sheet '{sheet_key}'): {', '.join(missing)}"
464
+ )
465
+
466
+ self.__say(
467
+ f"[RC] {sheet_key}.{table_name}: OK ({len(required_cols)}/{len(required_cols)} present)"
468
+ )
469
+
470
+ self.__say("Required columns validated")
471
+
472
+
473
+ def export_dictionary(
474
+ dictionary: Dictionary,
475
+ directory: Path | str,
476
+ filename: str | None = None,
477
+ overwrite: bool = False,
478
+ debug: bool = False,
479
+ _template_path: Path | str | None = None,
480
+ ) -> None:
481
+ """
482
+ Summary:
483
+ Export a data dictionary to an Excel file.
484
+
485
+ Arguments:
486
+ dictionary (Dictionary): data dictionary to export
487
+ directory (Path | str): directory to export to
488
+ filename (str | None): filename to export to (default is None)
489
+ overwrite (bool): whether to overwrite existing file (default is False)
490
+ debug (bool): whether to print debug information (default is False)
491
+ _template_path (Path | str | None): path to template data dictionary (default is None; changing not advised)
492
+
493
+ Raises:
494
+ DataDictionaryExportError: if unable to export data dictionary
495
+ """
496
+ exporter = Exporter(dictionary, debug=debug, template_path=_template_path)
497
+ exporter.write_details()
498
+ exporter.write_tables()
499
+ exporter.write_columns()
500
+ exporter.write_enumerations()
501
+ exporter.export(directory=directory, filename=filename, overwrite=overwrite)