valediction 1.1.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- valediction/data_types/data_type_helpers.py +2 -2
- valediction/data_types/data_types.py +6 -6
- valediction/data_types/type_inference.py +25 -13
- valediction/datasets/datasets.py +12 -12
- valediction/demo/DEMO - Data Dictionary.xlsx +0 -0
- valediction/demo/demo_dictionary.py +1 -1
- valediction/dictionary/generation.py +6 -6
- valediction/dictionary/helpers.py +1 -8
- valediction/dictionary/importing.py +44 -21
- valediction/dictionary/model.py +108 -36
- valediction/dictionary/template/PROJECT - Data Dictionary.xltx +0 -0
- valediction/integrity.py +80 -24
- valediction/io/csv_readers.py +3 -3
- valediction/support.py +5 -1
- valediction/validation/helpers.py +91 -35
- valediction/validation/issues.py +38 -25
- valediction/validation/validation.py +151 -110
- {valediction-1.1.0.dist-info → valediction-1.5.0.dist-info}/METADATA +1 -1
- valediction-1.5.0.dist-info/RECORD +38 -0
- valediction-1.1.0.dist-info/RECORD +0 -38
- {valediction-1.1.0.dist-info → valediction-1.5.0.dist-info}/WHEEL +0 -0
|
@@ -62,14 +62,14 @@ def infer_datetime_format(
|
|
|
62
62
|
|
|
63
63
|
|
|
64
64
|
def get_date_type(datetime_format: str) -> DataType | None:
|
|
65
|
-
"""Identifies if a datetime format string corresponds to a Date or
|
|
65
|
+
"""Identifies if a datetime format string corresponds to a Date or Timestamp data
|
|
66
66
|
type.
|
|
67
67
|
|
|
68
68
|
Args:
|
|
69
69
|
datetime_format (str): datetime format string
|
|
70
70
|
|
|
71
71
|
Returns:
|
|
72
|
-
DataType | None: DataType of Date,
|
|
72
|
+
DataType | None: DataType of Date, Timestamp, or None if not found.
|
|
73
73
|
"""
|
|
74
74
|
config = get_config()
|
|
75
75
|
return config.date_formats.get(datetime_format)
|
|
@@ -8,7 +8,7 @@ class DataType(Enum):
|
|
|
8
8
|
INTEGER = "Integer"
|
|
9
9
|
FLOAT = "Float"
|
|
10
10
|
DATE = "Date"
|
|
11
|
-
|
|
11
|
+
TIMESTAMP = "Timestamp"
|
|
12
12
|
FILE = "File"
|
|
13
13
|
|
|
14
14
|
def __str__(self) -> str:
|
|
@@ -32,9 +32,9 @@ class DataType(Enum):
|
|
|
32
32
|
"number": cls.FLOAT,
|
|
33
33
|
"numeric": cls.FLOAT,
|
|
34
34
|
"date": cls.DATE,
|
|
35
|
-
"datetime": cls.
|
|
36
|
-
"datetime64": cls.
|
|
37
|
-
"timestamp": cls.
|
|
35
|
+
"datetime": cls.TIMESTAMP,
|
|
36
|
+
"datetime64": cls.TIMESTAMP,
|
|
37
|
+
"timestamp": cls.TIMESTAMP,
|
|
38
38
|
"file": cls.FILE,
|
|
39
39
|
"blob": cls.FILE,
|
|
40
40
|
"binary": cls.FILE,
|
|
@@ -49,10 +49,10 @@ class DataType(Enum):
|
|
|
49
49
|
return self in {DataType.TEXT}
|
|
50
50
|
|
|
51
51
|
def valid_for_primary_key(self) -> bool:
|
|
52
|
-
"""PKs can only be Text, Integer, Date,
|
|
52
|
+
"""PKs can only be Text, Integer, Date, Timestamp."""
|
|
53
53
|
return self in {
|
|
54
54
|
DataType.TEXT,
|
|
55
55
|
DataType.INTEGER,
|
|
56
56
|
DataType.DATE,
|
|
57
|
-
DataType.
|
|
57
|
+
DataType.TIMESTAMP,
|
|
58
58
|
}
|
|
@@ -4,6 +4,7 @@ import re
|
|
|
4
4
|
import warnings
|
|
5
5
|
|
|
6
6
|
import pandas as pd
|
|
7
|
+
from pandas.api.types import is_object_dtype, is_string_dtype
|
|
7
8
|
|
|
8
9
|
from valediction.data_types.data_type_helpers import infer_datetime_format
|
|
9
10
|
from valediction.data_types.data_types import DataType
|
|
@@ -53,8 +54,8 @@ class ColumnState:
|
|
|
53
54
|
return DataType.FLOAT, None
|
|
54
55
|
if self.data_type == DataType.DATE:
|
|
55
56
|
return DataType.DATE, None
|
|
56
|
-
if self.data_type == DataType.
|
|
57
|
-
return DataType.
|
|
57
|
+
if self.data_type == DataType.TIMESTAMP:
|
|
58
|
+
return DataType.TIMESTAMP, None
|
|
58
59
|
|
|
59
60
|
return DataType.TEXT, _len1()
|
|
60
61
|
|
|
@@ -123,7 +124,7 @@ class TypeInferer:
|
|
|
123
124
|
_handling_function: callable = {
|
|
124
125
|
DataType.TEXT: self._handle_state_text,
|
|
125
126
|
DataType.DATE: self._handle_state_date,
|
|
126
|
-
DataType.
|
|
127
|
+
DataType.TIMESTAMP: self._handle_state_datetime,
|
|
127
128
|
DataType.INTEGER: self._handle_state_integer,
|
|
128
129
|
DataType.FLOAT: self._handle_state_float,
|
|
129
130
|
}.get(state.data_type, self._handle_state_text)
|
|
@@ -141,20 +142,31 @@ class TypeInferer:
|
|
|
141
142
|
self, s: pd.Series
|
|
142
143
|
) -> tuple[pd.Series, pd.Series, pd.Series, int | None]:
|
|
143
144
|
self.__begin_step(step="Trimming whitespace")
|
|
144
|
-
|
|
145
|
+
is_text = is_string_dtype(s) or is_object_dtype(s)
|
|
146
|
+
|
|
147
|
+
if is_text:
|
|
148
|
+
trimmed = s.astype("string").str.strip()
|
|
149
|
+
else:
|
|
150
|
+
trimmed = s
|
|
145
151
|
self.__complete_step()
|
|
146
152
|
|
|
147
153
|
self.__begin_step(step="Checking nulls")
|
|
148
|
-
|
|
154
|
+
if is_text:
|
|
155
|
+
nulls = trimmed.isna() | trimmed.str.lower().isin(self.null_tokens)
|
|
156
|
+
else:
|
|
157
|
+
nulls = trimmed.isna()
|
|
149
158
|
self.__complete_step()
|
|
150
159
|
|
|
151
160
|
self.__begin_step(step="Checking max length")
|
|
152
|
-
|
|
153
|
-
|
|
161
|
+
if is_text:
|
|
162
|
+
lengths = trimmed.str.len()
|
|
163
|
+
max_len = int(lengths.max(skipna=True)) if lengths.notna().any() else None
|
|
164
|
+
else:
|
|
165
|
+
max_len = None
|
|
154
166
|
self.__complete_step()
|
|
155
167
|
|
|
156
168
|
self.__begin_step(step="Setting non-null mask")
|
|
157
|
-
nonnull_mask = (~nulls) &
|
|
169
|
+
nonnull_mask = (~nulls) & trimmed.notna()
|
|
158
170
|
self.__complete_step()
|
|
159
171
|
|
|
160
172
|
return trimmed, nulls, nonnull_mask, max_len
|
|
@@ -193,7 +205,7 @@ class TypeInferer:
|
|
|
193
205
|
if ok.all():
|
|
194
206
|
self._transition(
|
|
195
207
|
st,
|
|
196
|
-
DataType.
|
|
208
|
+
DataType.TIMESTAMP if has_time.any() else DataType.DATE,
|
|
197
209
|
f"cached datetime format={st.cached_datetime_format!r}",
|
|
198
210
|
)
|
|
199
211
|
self.__complete_step()
|
|
@@ -210,7 +222,7 @@ class TypeInferer:
|
|
|
210
222
|
st.cached_datetime_format = fmt
|
|
211
223
|
self._transition(
|
|
212
224
|
st,
|
|
213
|
-
DataType.
|
|
225
|
+
DataType.TIMESTAMP if has_time.any() else DataType.DATE,
|
|
214
226
|
f"explicit datetime format={fmt!r}",
|
|
215
227
|
)
|
|
216
228
|
self.__complete_step()
|
|
@@ -276,7 +288,7 @@ class TypeInferer:
|
|
|
276
288
|
st.lock_text_permanent = True
|
|
277
289
|
self._transition(st, DataType.TEXT, "datetime parse failures")
|
|
278
290
|
elif has_time.any():
|
|
279
|
-
self._transition(st, DataType.
|
|
291
|
+
self._transition(st, DataType.TIMESTAMP, "time component detected")
|
|
280
292
|
|
|
281
293
|
self.__complete_step()
|
|
282
294
|
|
|
@@ -334,7 +346,7 @@ class TypeInferer:
|
|
|
334
346
|
if ok.all():
|
|
335
347
|
self._transition(
|
|
336
348
|
st,
|
|
337
|
-
DataType.
|
|
349
|
+
DataType.TIMESTAMP if has_time.any() else DataType.DATE,
|
|
338
350
|
f"cached datetime format={st.cached_datetime_format!r}",
|
|
339
351
|
)
|
|
340
352
|
return True
|
|
@@ -377,7 +389,7 @@ class TypeInferer:
|
|
|
377
389
|
if ok.all():
|
|
378
390
|
self._transition(
|
|
379
391
|
st,
|
|
380
|
-
DataType.
|
|
392
|
+
DataType.TIMESTAMP if has_time.any() else DataType.DATE,
|
|
381
393
|
f"explicit datetime format={st.cached_datetime_format!r}",
|
|
382
394
|
)
|
|
383
395
|
return True
|
valediction/datasets/datasets.py
CHANGED
|
@@ -20,7 +20,8 @@ from valediction.io.csv_readers import (
|
|
|
20
20
|
)
|
|
21
21
|
from valediction.support import (
|
|
22
22
|
_get_runtime_string,
|
|
23
|
-
|
|
23
|
+
_normalise,
|
|
24
|
+
_strip,
|
|
24
25
|
list_as_bullets,
|
|
25
26
|
print_bold_red,
|
|
26
27
|
print_red,
|
|
@@ -437,16 +438,16 @@ class Dataset(list[DatasetItem]):
|
|
|
437
438
|
|
|
438
439
|
# Getters
|
|
439
440
|
def get(self, name: str, default: DatasetItem | None = None) -> DatasetItem | None:
|
|
440
|
-
name_key = name
|
|
441
|
+
name_key = _normalise(name)
|
|
441
442
|
for item in self:
|
|
442
|
-
if item.name
|
|
443
|
+
if _normalise(item.name) == name_key:
|
|
443
444
|
return item
|
|
444
445
|
return default
|
|
445
446
|
|
|
446
447
|
def index_of(self, name: str) -> int | None:
|
|
447
|
-
name_key = name
|
|
448
|
+
name_key = _normalise(name)
|
|
448
449
|
for i, item in enumerate(self):
|
|
449
|
-
if item.name == name_key:
|
|
450
|
+
if _normalise(item.name) == name_key:
|
|
450
451
|
return i
|
|
451
452
|
return None
|
|
452
453
|
|
|
@@ -796,20 +797,21 @@ class Dataset(list[DatasetItem]):
|
|
|
796
797
|
name: str | None,
|
|
797
798
|
data: DataLike,
|
|
798
799
|
) -> DatasetItem:
|
|
799
|
-
"""Normalise a (
|
|
800
|
+
"""Normalise a (name, data) double into a DatasetItem."""
|
|
800
801
|
if isinstance(data, (str, Path)):
|
|
801
802
|
path = Path(data)
|
|
802
803
|
if not path.exists():
|
|
803
804
|
raise FileNotFoundError(f"File not found: {path}")
|
|
804
805
|
if path.suffix.lower() != ".csv":
|
|
805
806
|
raise ValueError(f"Only .csv supported right now, got: {path}")
|
|
806
|
-
resolved_name =
|
|
807
|
+
resolved_name = _strip(name or path.stem)
|
|
807
808
|
return DatasetItem(name=resolved_name, data=path.resolve())
|
|
808
809
|
|
|
809
810
|
if isinstance(data, DataFrame):
|
|
810
811
|
if not name:
|
|
811
812
|
raise ValueError("When providing a DataFrame, 'name' is required.")
|
|
812
|
-
resolved_name =
|
|
813
|
+
resolved_name = _strip(name)
|
|
814
|
+
data.columns = [_strip(column) for column in data.columns]
|
|
813
815
|
return DatasetItem(name=resolved_name, data=data)
|
|
814
816
|
|
|
815
817
|
raise TypeError("data must be a Path/str to .csv or a pandas DataFrame.")
|
|
@@ -823,13 +825,11 @@ class Dataset(list[DatasetItem]):
|
|
|
823
825
|
if p.is_file():
|
|
824
826
|
if p.suffix.lower() != ".csv":
|
|
825
827
|
raise ValueError(f"Expected a .csv file, got: {p.suffix} ({p})")
|
|
826
|
-
return [DatasetItem(name=
|
|
828
|
+
return [DatasetItem(name=_strip(p.stem), data=p.resolve())]
|
|
827
829
|
|
|
828
830
|
if p.is_dir():
|
|
829
831
|
return [
|
|
830
|
-
DatasetItem(
|
|
831
|
-
name=_normalise_name(csv_path.stem), data=csv_path.resolve()
|
|
832
|
-
)
|
|
832
|
+
DatasetItem(name=_strip(csv_path.stem), data=csv_path.resolve())
|
|
833
833
|
for csv_path in p.glob("*.csv")
|
|
834
834
|
]
|
|
835
835
|
|
|
Binary file
|
|
@@ -103,7 +103,7 @@ def demo_dictionary() -> Dictionary:
|
|
|
103
103
|
foreign_key="DEMOGRAPHICS.PATIENT_HASH",
|
|
104
104
|
),
|
|
105
105
|
Column(
|
|
106
|
-
name="OBSERVATION_TIME", order=2, data_type="
|
|
106
|
+
name="OBSERVATION_TIME", order=2, data_type="timestamp", primary_key=2
|
|
107
107
|
),
|
|
108
108
|
Column(
|
|
109
109
|
name="OBSERVATION_TYPE",
|
|
@@ -24,7 +24,7 @@ from valediction.io.csv_readers import (
|
|
|
24
24
|
read_csv_sample,
|
|
25
25
|
)
|
|
26
26
|
from valediction.progress import Progress
|
|
27
|
-
from valediction.support import
|
|
27
|
+
from valediction.support import _strip, calculate_runtime
|
|
28
28
|
|
|
29
29
|
IMPORTING_DATA = "Importing data"
|
|
30
30
|
CHUNK_STEPS = 1
|
|
@@ -124,7 +124,7 @@ class Generator:
|
|
|
124
124
|
self.__say(f"Generating dictionary for {len(items)} tables")
|
|
125
125
|
for item in items:
|
|
126
126
|
self.__progress_init(item)
|
|
127
|
-
table = Table(name=
|
|
127
|
+
table = Table(name=_strip(item.name))
|
|
128
128
|
dictionary.add_table(table)
|
|
129
129
|
|
|
130
130
|
if item.is_path:
|
|
@@ -192,7 +192,7 @@ class Generator:
|
|
|
192
192
|
col_state = inferer.states[col_name]
|
|
193
193
|
data_type, length = col_state.final_data_type_and_length()
|
|
194
194
|
col = Column(
|
|
195
|
-
name=
|
|
195
|
+
name=_strip(col_name),
|
|
196
196
|
order=idx,
|
|
197
197
|
data_type=data_type,
|
|
198
198
|
length=length if data_type == DataType.TEXT else None,
|
|
@@ -242,7 +242,7 @@ class Generator:
|
|
|
242
242
|
col_state = inferer.states[col_name]
|
|
243
243
|
data_type, length = col_state.final_data_type_and_length()
|
|
244
244
|
col = Column(
|
|
245
|
-
name=
|
|
245
|
+
name=_strip(col_name),
|
|
246
246
|
order=idx,
|
|
247
247
|
data_type=data_type,
|
|
248
248
|
length=length if data_type == DataType.TEXT else None,
|
|
@@ -257,7 +257,7 @@ class Generator:
|
|
|
257
257
|
table.add_column(col)
|
|
258
258
|
|
|
259
259
|
def _set_datetime_format(self, column_state: ColumnState, column: Column) -> None:
|
|
260
|
-
if column.data_type in (DataType.DATE, DataType.
|
|
260
|
+
if column.data_type in (DataType.DATE, DataType.TIMESTAMP):
|
|
261
261
|
datetime_format = getattr(column_state, "cached_datetime_format", None)
|
|
262
262
|
if datetime_format and hasattr(column, "datetime_format"):
|
|
263
263
|
column.datetime_format = datetime_format
|
|
@@ -277,7 +277,7 @@ class Generator:
|
|
|
277
277
|
next_order = max((c.order or 0 for c in table), default=0) + 1
|
|
278
278
|
data_type, length = col_state.final_data_type_and_length()
|
|
279
279
|
new_col = Column(
|
|
280
|
-
name=
|
|
280
|
+
name=_strip(col_name),
|
|
281
281
|
order=next_order,
|
|
282
282
|
data_type=data_type,
|
|
283
283
|
length=length if data_type == DataType.TEXT else None,
|
|
@@ -26,9 +26,6 @@ def _check_name(name: str, entity: Literal["table", "column"]) -> list[str]:
|
|
|
26
26
|
else config.max_column_name_length
|
|
27
27
|
)
|
|
28
28
|
|
|
29
|
-
if name != name.upper(): # name must be uppercase
|
|
30
|
-
errors.append("must be uppercase")
|
|
31
|
-
|
|
32
29
|
if invalid_chars.search(name): # check invalid characters
|
|
33
30
|
bad = set(invalid_chars.findall(name))
|
|
34
31
|
errors.append(
|
|
@@ -109,16 +106,12 @@ def _check_primary_key(primary_key: int | None, data_type: DataType) -> list[str
|
|
|
109
106
|
):
|
|
110
107
|
errors.append(
|
|
111
108
|
f"invalid data type '{data_type.value}' for primary key column; "
|
|
112
|
-
"primary keys must be Text, Integer, Date, or
|
|
109
|
+
"primary keys must be Text, Integer, Date, or Timestamp"
|
|
113
110
|
)
|
|
114
111
|
|
|
115
112
|
return errors
|
|
116
113
|
|
|
117
114
|
|
|
118
|
-
def _normalise_name(name: str) -> str:
|
|
119
|
-
return name.upper().strip()
|
|
120
|
-
|
|
121
|
-
|
|
122
115
|
def _norm_header_map(columns: list) -> dict:
|
|
123
116
|
mapping, _ = {}, set()
|
|
124
117
|
for c in columns:
|
|
@@ -11,7 +11,6 @@ from valediction.dictionary.helpers import (
|
|
|
11
11
|
_get_required_header,
|
|
12
12
|
_is_missing,
|
|
13
13
|
_norm_header_map,
|
|
14
|
-
_normalise_name,
|
|
15
14
|
_parse_int,
|
|
16
15
|
_parse_truthy,
|
|
17
16
|
_row_is_blank,
|
|
@@ -19,7 +18,7 @@ from valediction.dictionary.helpers import (
|
|
|
19
18
|
from valediction.dictionary.integrity import REQUIRED_SHEETS
|
|
20
19
|
from valediction.dictionary.model import Column, Dictionary, Table
|
|
21
20
|
from valediction.exceptions import DataDictionaryError, DataDictionaryImportError
|
|
22
|
-
from valediction.support import list_as_bullets
|
|
21
|
+
from valediction.support import _normalise, _strip, list_as_bullets
|
|
23
22
|
|
|
24
23
|
|
|
25
24
|
@dataclass
|
|
@@ -80,6 +79,13 @@ class ExcelDataDictionary:
|
|
|
80
79
|
raise error
|
|
81
80
|
|
|
82
81
|
# Import & Helpers
|
|
82
|
+
def _resolve_table_name(self, name: str) -> str | None:
|
|
83
|
+
"""Return the canonical table name as it appears in Tables sheet (or None)."""
|
|
84
|
+
target = _normalise(name)
|
|
85
|
+
return next(
|
|
86
|
+
(t for t in self.table_metadata.keys() if _normalise(t) == target), None
|
|
87
|
+
)
|
|
88
|
+
|
|
83
89
|
def _open_workbook(self) -> None:
|
|
84
90
|
if not self.path.exists():
|
|
85
91
|
raise DataDictionaryImportError(f"File not found: {self.path}")
|
|
@@ -140,20 +146,27 @@ class ExcelDataDictionary:
|
|
|
140
146
|
description_col_header = _get_required_header(header_map, "description")
|
|
141
147
|
|
|
142
148
|
meta: dict[str, str | None] = {}
|
|
149
|
+
seen: set[str] = set()
|
|
150
|
+
|
|
143
151
|
for _, row in tables_df.iterrows():
|
|
144
152
|
if _is_missing(row[table_col_header]):
|
|
145
153
|
continue
|
|
146
|
-
|
|
154
|
+
|
|
155
|
+
table_name = _strip(str(row[table_col_header]))
|
|
147
156
|
table_description = (
|
|
148
157
|
None
|
|
149
158
|
if _is_missing(row[description_col_header])
|
|
150
159
|
else str(row[description_col_header])
|
|
151
160
|
)
|
|
152
|
-
|
|
161
|
+
|
|
162
|
+
key = _normalise(table_name)
|
|
163
|
+
if key in seen:
|
|
153
164
|
raise DataDictionaryImportError(
|
|
154
165
|
f"Duplicate table '{table_name}' in Tables sheet."
|
|
155
166
|
)
|
|
167
|
+
seen.add(key)
|
|
156
168
|
meta[table_name] = table_description
|
|
169
|
+
|
|
157
170
|
if not meta:
|
|
158
171
|
raise DataDictionaryImportError(
|
|
159
172
|
"Data Dictionary sheet 'Tables' contains no table rows."
|
|
@@ -177,12 +190,13 @@ class ExcelDataDictionary:
|
|
|
177
190
|
or _is_missing(row[code_col_header])
|
|
178
191
|
):
|
|
179
192
|
continue
|
|
180
|
-
table_name =
|
|
181
|
-
column_name =
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
]
|
|
193
|
+
table_name = _strip(str(row[table_col_header]))
|
|
194
|
+
column_name = _strip(str(row[column_col_header]))
|
|
195
|
+
resolved_table = self._resolve_table_name(table_name) or table_name
|
|
196
|
+
enum_key = (_normalise(resolved_table), _normalise(column_name))
|
|
197
|
+
enum_map.setdefault(enum_key, {})
|
|
198
|
+
enum_map[enum_key][row[code_col_header]] = row[name_col_header]
|
|
199
|
+
|
|
186
200
|
self.enumerations = enum_map
|
|
187
201
|
|
|
188
202
|
# Parse Columns
|
|
@@ -234,7 +248,12 @@ class ExcelDataDictionary:
|
|
|
234
248
|
|
|
235
249
|
self.table_columns[inputs.table_name].append(column_obj)
|
|
236
250
|
if inputs.has_enumerations:
|
|
237
|
-
self.enum_flags.add(
|
|
251
|
+
self.enum_flags.add(
|
|
252
|
+
(
|
|
253
|
+
_normalise(inputs.table_name),
|
|
254
|
+
_normalise(inputs.column_name),
|
|
255
|
+
)
|
|
256
|
+
)
|
|
238
257
|
|
|
239
258
|
if errors:
|
|
240
259
|
raise DataDictionaryImportError(
|
|
@@ -279,7 +298,7 @@ class ExcelDataDictionary:
|
|
|
279
298
|
|
|
280
299
|
# Validate Foreign Keys
|
|
281
300
|
def _validate_foreign_keys(self) -> None:
|
|
282
|
-
name_to_table = {t.name: t for t in self.tables}
|
|
301
|
+
name_to_table = {_normalise(t.name): t for t in self.tables}
|
|
283
302
|
errors: list[str] = []
|
|
284
303
|
for table in self.tables:
|
|
285
304
|
for column in table:
|
|
@@ -292,9 +311,9 @@ class ExcelDataDictionary:
|
|
|
292
311
|
)
|
|
293
312
|
continue
|
|
294
313
|
target_table_raw, target_column_raw = target.split(".", 1)
|
|
295
|
-
target_table_name =
|
|
296
|
-
target_column_name =
|
|
297
|
-
referenced_table = name_to_table.get(target_table_name)
|
|
314
|
+
target_table_name = _strip(target_table_raw)
|
|
315
|
+
target_column_name = _strip(target_column_raw)
|
|
316
|
+
referenced_table = name_to_table.get(_normalise(target_table_name))
|
|
298
317
|
if not referenced_table:
|
|
299
318
|
errors.append(
|
|
300
319
|
f"{table.name}.{column.name} references unknown table {target_table_name!r}."
|
|
@@ -345,7 +364,7 @@ class ExcelDataDictionary:
|
|
|
345
364
|
enumeration_flag_col_header = header_map.get("enumerations")
|
|
346
365
|
primary_key_col_header = header_map.get("primary_key")
|
|
347
366
|
foreign_key_col_header = header_map.get("foreign_key_target")
|
|
348
|
-
description_col_header = header_map.get("
|
|
367
|
+
description_col_header = header_map.get("column_description")
|
|
349
368
|
return (
|
|
350
369
|
table_col_header,
|
|
351
370
|
column_col_header,
|
|
@@ -392,13 +411,17 @@ class ExcelDataDictionary:
|
|
|
392
411
|
f"{row_context}: missing required field(s): {', '.join(missing_fields)}."
|
|
393
412
|
)
|
|
394
413
|
|
|
395
|
-
|
|
396
|
-
column_name =
|
|
397
|
-
|
|
414
|
+
table_name_raw = _strip(str(row[table_col_header]))
|
|
415
|
+
column_name = _strip(str(row[column_col_header]))
|
|
416
|
+
|
|
417
|
+
resolved_table_name = self._resolve_table_name(table_name_raw)
|
|
418
|
+
if resolved_table_name is None:
|
|
398
419
|
raise DataDictionaryImportError(
|
|
399
|
-
f"{row_context}: Table '{
|
|
420
|
+
f"{row_context}: Table '{table_name_raw}' not present in Tables sheet."
|
|
400
421
|
)
|
|
401
422
|
|
|
423
|
+
table_name = resolved_table_name
|
|
424
|
+
|
|
402
425
|
order_int = _parse_int(row[order_col_header], "Order", row_context)
|
|
403
426
|
length_int = (
|
|
404
427
|
_parse_int(row[length_col_header], "Length", row_context, required=False)
|
|
@@ -461,7 +484,7 @@ class ExcelDataDictionary:
|
|
|
461
484
|
|
|
462
485
|
def _make_column(self, inputs: _ColumnInputs) -> Column:
|
|
463
486
|
enums_for_column = self.enumerations.get(
|
|
464
|
-
(inputs.table_name, inputs.column_name), {}
|
|
487
|
+
(_normalise(inputs.table_name), _normalise(inputs.column_name)), {}
|
|
465
488
|
)
|
|
466
489
|
return Column(
|
|
467
490
|
name=inputs.column_name,
|