valediction 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. valediction/__init__.py +8 -0
  2. valediction/convenience.py +50 -0
  3. valediction/data_types/__init__.py +0 -0
  4. valediction/data_types/data_type_helpers.py +75 -0
  5. valediction/data_types/data_types.py +58 -0
  6. valediction/data_types/type_inference.py +541 -0
  7. valediction/datasets/__init__.py +0 -0
  8. valediction/datasets/datasets.py +870 -0
  9. valediction/datasets/datasets_helpers.py +46 -0
  10. valediction/demo/DEMO - Data Dictionary.xlsx +0 -0
  11. valediction/demo/DEMOGRAPHICS.csv +101 -0
  12. valediction/demo/DIAGNOSES.csv +650 -0
  13. valediction/demo/LAB_TESTS.csv +1001 -0
  14. valediction/demo/VITALS.csv +1001 -0
  15. valediction/demo/__init__.py +6 -0
  16. valediction/demo/demo_dictionary.py +129 -0
  17. valediction/dictionary/__init__.py +0 -0
  18. valediction/dictionary/exporting.py +501 -0
  19. valediction/dictionary/exporting_helpers.py +371 -0
  20. valediction/dictionary/generation.py +357 -0
  21. valediction/dictionary/helpers.py +174 -0
  22. valediction/dictionary/importing.py +494 -0
  23. valediction/dictionary/integrity.py +37 -0
  24. valediction/dictionary/model.py +582 -0
  25. valediction/dictionary/template/PROJECT - Data Dictionary.xltx +0 -0
  26. valediction/exceptions.py +22 -0
  27. valediction/integrity.py +97 -0
  28. valediction/io/__init__.py +0 -0
  29. valediction/io/csv_readers.py +307 -0
  30. valediction/progress.py +206 -0
  31. valediction/support.py +72 -0
  32. valediction/validation/__init__.py +0 -0
  33. valediction/validation/helpers.py +315 -0
  34. valediction/validation/issues.py +280 -0
  35. valediction/validation/validation.py +598 -0
  36. valediction-1.0.0.dist-info/METADATA +15 -0
  37. valediction-1.0.0.dist-info/RECORD +38 -0
  38. valediction-1.0.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,371 @@
1
+ from copy import copy
2
+ from dataclasses import dataclass
3
+ from typing import Any, Dict, Iterable, List, Mapping, Tuple
4
+
5
+ from openpyxl.formatting.formatting import (
6
+ ConditionalFormatting,
7
+ ConditionalFormattingList,
8
+ )
9
+ from openpyxl.utils import get_column_letter, range_boundaries
10
+ from openpyxl.worksheet.cell_range import MultiCellRange
11
+ from openpyxl.worksheet.table import Table as ExcelTable
12
+ from openpyxl.worksheet.worksheet import Worksheet
13
+
14
+ from valediction.exceptions import DataDictionaryError, DataDictionaryImportError
15
+
16
+
17
+ @dataclass
18
+ class CFRuleInfo:
19
+ ranges: list[str]
20
+ type: str | None = None
21
+ formula: list[str] | None = None
22
+ operator: str | None = None
23
+ dxfId: int | None = None
24
+ priority: int | None = None
25
+
26
+
27
+ @dataclass
28
+ class DVRuleInfo:
29
+ ranges: list[str]
30
+ type: str | None = None
31
+ operator: str | None = None
32
+ formula1: str | None = None
33
+ formula2: str | None = None
34
+ allowBlank: bool | None = None
35
+ showErrorMessage: bool | None = None
36
+ showInputMessage: bool | None = None
37
+ promptTitle: str | None = None
38
+ prompt: str | None = None
39
+ errorTitle: str | None = None
40
+ error: str | None = None
41
+
42
+
43
+ @dataclass
44
+ class CalculatedColInfo:
45
+ column_name: str
46
+ column_index: int
47
+ header_cell: str
48
+ formula_sample: str
49
+ coverage: float
50
+
51
+
52
+ def _collect_conditional_formats(worksheet: Worksheet) -> Iterable[CFRuleInfo]:
53
+ """Yield CF rules for a worksheet with their target ranges.
54
+
55
+ Works across openpyxl versions by checking internal/public mappings.
56
+ """
57
+ cf = getattr(worksheet, "conditional_formatting", None)
58
+ if cf is None:
59
+ return
60
+ yield # for typing
61
+
62
+ items = None
63
+ # Preferred: internal mapping {sqref(tuple-of-ranges or string): [Rule,...]}
64
+ if hasattr(cf, "_cf_rules") and isinstance(cf._cf_rules, dict):
65
+ items = list(cf._cf_rules.items())
66
+ # Fallback for some versions
67
+ elif hasattr(cf, "cf_rules") and hasattr(cf, "ranges"):
68
+ items = [(rng, cf.cf_rules(rng)) for rng in cf.ranges]
69
+
70
+ if not items:
71
+ return
72
+
73
+ for sqref, rules in items:
74
+ # normalize ranges to list[str]
75
+ if isinstance(sqref, (list, tuple)):
76
+ ranges = [str(r) for r in sqref]
77
+ else:
78
+ ranges = [str(sqref)]
79
+ for rule in rules:
80
+ yield CFRuleInfo(
81
+ ranges=ranges,
82
+ type=getattr(rule, "type", None),
83
+ formula=getattr(rule, "formula", None),
84
+ operator=getattr(rule, "operator", None),
85
+ dxfId=getattr(rule, "dxfId", None),
86
+ priority=getattr(rule, "priority", None),
87
+ )
88
+
89
+
90
+ def _collect_data_validations(worksheet: Worksheet) -> Iterable[DVRuleInfo]:
91
+ """Yield data validation rules for a worksheet with their target ranges."""
92
+ dv_list = getattr(worksheet, "data_validations", None)
93
+ if dv_list is None:
94
+ return
95
+ yield # typing
96
+
97
+ # In openpyxl, ws.data_validations.dataValidation is a list of DataValidation
98
+ dvals = getattr(dv_list, "dataValidation", None)
99
+ if not dvals:
100
+ return
101
+
102
+ for dv in dvals:
103
+ # sqref is a MultiCellRange; convert to list[str]
104
+ ranges: list[str] = []
105
+ sqref = getattr(dv, "sqref", None) or getattr(dv, "ranges", None)
106
+ if sqref is not None:
107
+ try:
108
+ # MultiCellRange: iterate .ranges -> CellRange objects
109
+ ranges = [str(r) for r in sqref.ranges]
110
+ except Exception:
111
+ # fallback to string
112
+ ranges = [str(sqref)]
113
+
114
+ yield DVRuleInfo(
115
+ ranges=ranges,
116
+ type=getattr(dv, "type", None),
117
+ operator=getattr(dv, "operator", None),
118
+ formula1=getattr(dv, "formula1", None),
119
+ formula2=getattr(dv, "formula2", None),
120
+ allowBlank=getattr(dv, "allowBlank", None),
121
+ showErrorMessage=getattr(dv, "showErrorMessage", None),
122
+ showInputMessage=getattr(dv, "showInputMessage", None),
123
+ promptTitle=getattr(dv, "promptTitle", None),
124
+ prompt=getattr(dv, "prompt", None),
125
+ errorTitle=getattr(dv, "errorTitle", None),
126
+ error=getattr(dv, "error", None),
127
+ )
128
+
129
+
130
+ def _collect_table_formulas(table: ExcelTable) -> dict[str, str]:
131
+ out: dict[str, str] = {}
132
+ for column in getattr(table, "tableColumns", []):
133
+ name = (getattr(column, "name", "") or "").strip()
134
+ if not name:
135
+ continue
136
+ formula = getattr(column, "calculatedColumnFormula", None)
137
+ if not formula:
138
+ continue
139
+ text = (
140
+ getattr(formula, "attr_text", None)
141
+ or getattr(formula, "text", None)
142
+ or str(formula)
143
+ )
144
+ if text and not str(text).startswith("="):
145
+ text = f"={text}"
146
+ out[name.lower()] = str(text)
147
+ return out
148
+
149
+
150
+ def _table_column_index_map(tbl: ExcelTable) -> dict[str, int]:
151
+ cols = getattr(tbl, "tableColumns", None)
152
+ if not cols:
153
+ return {}
154
+
155
+ name_to_idx: dict[str, int] = {}
156
+ for idx, col in enumerate(list(cols), start=1):
157
+ raw = (getattr(col, "name", "") or "").strip()
158
+ if not raw:
159
+ continue
160
+ key = raw.lower()
161
+ if key in name_to_idx:
162
+ raise DataDictionaryImportError(
163
+ f"Duplicate column name (case-insensitive) in table '{getattr(tbl, 'displayName', getattr(tbl, 'name', ''))}': '{raw}'"
164
+ )
165
+ name_to_idx[key] = idx
166
+ return name_to_idx
167
+
168
+
169
+ def _cf_entries(cf: ConditionalFormattingList) -> list[tuple[object, list]]:
170
+ """Return [(key, rules)] from ws.conditional_formatting or [] if unsupported."""
171
+ if hasattr(cf, "_cf_rules") and isinstance(cf._cf_rules, dict):
172
+ return list(cf._cf_rules.items())
173
+ return []
174
+
175
+
176
+ def _key_ranges(key: ConditionalFormatting | MultiCellRange) -> list[str]:
177
+ """Normalize a CF dict key into a list of A1 range strings."""
178
+ if hasattr(key, "sqref") and hasattr(key.sqref, "ranges"): # ConditionalFormatting
179
+ return [str(r) for r in key.sqref.ranges]
180
+ if hasattr(key, "ranges"): # MultiCellRange
181
+ return [str(r) for r in key.ranges]
182
+ if isinstance(key, (list, tuple)):
183
+ return [str(r) for r in key]
184
+ return [str(key)]
185
+
186
+
187
+ def _extend_ranges(
188
+ range_strs: List[str], t_min_c: int, t_max_c: int, old_bottom: int, new_row: int
189
+ ) -> Tuple[List[str], bool]:
190
+ """Compute new sqref ranges; return (ranges, changed?)."""
191
+ new_ranges: List[str] = []
192
+ changed = False
193
+ for rng in range_strs:
194
+ try:
195
+ c1, r1, c2, r2 = range_boundaries(rng)
196
+ except ValueError:
197
+ new_ranges.append(rng)
198
+ continue
199
+ if r1 <= new_row <= r2:
200
+ new_ranges.append(rng)
201
+ continue
202
+ overlaps_cols = not (c2 < t_min_c or c1 > t_max_c)
203
+ if r2 == old_bottom and overlaps_cols:
204
+ r2 = new_row
205
+ changed = True
206
+ new_ranges.append(f"{get_column_letter(c1)}{r1}:{get_column_letter(c2)}{r2}")
207
+ return new_ranges, changed
208
+
209
+
210
+ def _extend_cf_for_new_row(
211
+ ws, table_cols: tuple[int, int], old_bottom: int, new_row: int
212
+ ) -> None:
213
+ cf = getattr(ws, "conditional_formatting", None)
214
+ if cf is None:
215
+ return
216
+
217
+ t_min_c, t_max_c = table_cols
218
+ entries = _cf_entries(cf)
219
+ if not entries:
220
+ return
221
+
222
+ for key, rules in entries:
223
+ range_strs = _key_ranges(key)
224
+ new_ranges, changed = _extend_ranges(
225
+ range_strs, t_min_c, t_max_c, old_bottom, new_row
226
+ )
227
+ if not changed:
228
+ continue
229
+ # replace mapping (do NOT mutate key)
230
+ cf._cf_rules.pop(key, None)
231
+ sqref = ",".join(new_ranges)
232
+ for rule in rules:
233
+ cf.add(sqref, copy(rule))
234
+
235
+
236
+ def _extend_dv_for_new_row(ws, table_cols: tuple[int, int], new_row: int) -> None:
237
+ """For each DataValidation object, if any of its ranges overlap table columns, add
238
+ the new_row segment for the overlapping columns to that DV."""
239
+ t_min_c, t_max_c = table_cols
240
+ dv_list = getattr(ws, "data_validations", None)
241
+ if not dv_list:
242
+ return
243
+
244
+ dvals = getattr(dv_list, "dataValidation", None)
245
+ if not dvals:
246
+ return
247
+
248
+ for dv in dvals:
249
+ sqref = getattr(dv, "sqref", None) or getattr(dv, "ranges", None)
250
+ if not sqref:
251
+ continue
252
+ try:
253
+ rngs = list(sqref.ranges) # MultiCellRange -> list[CellRange]
254
+ except Exception:
255
+ rngs = []
256
+
257
+ to_add_segments: list[str] = []
258
+ for cr in rngs:
259
+ c1, r1, c2, r2 = range_boundaries(str(cr))
260
+ # If DV already covers the new row in this segment, skip
261
+ if r1 <= new_row <= r2:
262
+ continue
263
+ # Column overlap with the table
264
+ oc1 = max(c1, t_min_c)
265
+ oc2 = min(c2, t_max_c)
266
+ if oc1 > oc2:
267
+ continue
268
+ to_add_segments.append(
269
+ f"{get_column_letter(oc1)}{new_row}:{get_column_letter(oc2)}{new_row}"
270
+ )
271
+
272
+ for seg in to_add_segments:
273
+ dv.add(seg)
274
+
275
+
276
+ def _first_blank_data_row(
277
+ ws: Worksheet, tbl: ExcelTable, colmap: dict[str, int], formulas: dict[str, str]
278
+ ) -> int | None:
279
+ """Return the first data-row index (0-based worksheet row) that is blank across all
280
+ *non-formula* columns.
281
+
282
+ None if no such row.
283
+ """
284
+ min_c, min_r, max_c, max_r = range_boundaries(tbl.ref)
285
+ data_start = min_r + 1
286
+ if data_start > max_r:
287
+ return None
288
+
289
+ # quick reverse map: table-index(1..n) -> header_lower
290
+ idx_to_header = {v: k for k, v in colmap.items()}
291
+ ncols = max_c - min_c + 1
292
+
293
+ for r in range(data_start, max_r + 1):
294
+ all_blank = True
295
+ for j_idx in range(1, ncols + 1):
296
+ header_lower = idx_to_header.get(j_idx, "")
297
+ if header_lower in formulas:
298
+ # ignore formula columns when deciding "blank"
299
+ continue
300
+ cell = ws.cell(r, min_c + j_idx - 1)
301
+ val = cell.value
302
+ if val is None:
303
+ continue
304
+ if isinstance(val, str) and val.strip() == "":
305
+ continue
306
+ # any non-empty value in a non-formula column -> row not blank
307
+ all_blank = False
308
+ break
309
+ if all_blank:
310
+ return r
311
+ return None
312
+
313
+
314
+ def _build_row_list_from_mapping(
315
+ tbl: ExcelTable,
316
+ colmap: Dict[str, int],
317
+ formulas: Dict[str, str],
318
+ mapping: Mapping[str, Any],
319
+ ) -> List[Any]:
320
+ """Build a full-width list for add_row() from a {header->value} mapping (CI).
321
+
322
+ Values for formula columns are ignored by add_row anyway; we leave them None.
323
+ """
324
+ min_c, min_r, max_c, max_r = range_boundaries(tbl.ref)
325
+ ncols = max_c - min_c + 1
326
+
327
+ # normalise input keys to lower-case once
328
+ src = {str(k).strip().lower(): v for k, v in mapping.items()}
329
+ # start as all None
330
+ row_list: List[Any] = [None] * ncols
331
+
332
+ # inverse map: table index -> header lower
333
+ idx_to_header = {v: k for k, v in colmap.items()}
334
+
335
+ for j_idx in range(1, ncols + 1):
336
+ header_lower = idx_to_header.get(j_idx, "")
337
+ if not header_lower:
338
+ continue
339
+ # skip formula columns; add_row will write formulas
340
+ if header_lower in formulas:
341
+ continue
342
+ if header_lower in src:
343
+ row_list[j_idx - 1] = src[header_lower]
344
+ return row_list
345
+
346
+
347
+ def _norm_label(s: object) -> str:
348
+ """Normalise a cell label for case-insensitive comparison."""
349
+ if s is None:
350
+ return ""
351
+ txt = str(s).replace("\n", " ").replace("\r", " ").strip()
352
+ # ignore a single trailing colon, collapse inner whitespace
353
+ if txt.endswith(":"):
354
+ txt = txt[:-1]
355
+ return " ".join(txt.split()).casefold()
356
+
357
+
358
+ def _find_label_cell(ws: Worksheet, label: str) -> tuple[int, int]:
359
+ """Find the cell coordinates (row, col) whose text matches `label` case-
360
+ insensitively (ignoring a trailing colon).
361
+
362
+ Scans the used range only. Raises DataDictionaryError if not found.
363
+ """
364
+ target = _norm_label(label)
365
+ min_row, min_col = ws.min_row, ws.min_column
366
+ max_row, max_col = ws.max_row, ws.max_column
367
+ for r in range(min_row, max_row + 1):
368
+ for c in range(min_col, max_col + 1):
369
+ if _norm_label(ws.cell(r, c).value) == target:
370
+ return r, c
371
+ raise DataDictionaryError(f"Details label not found: {label!r}")