thds.tabularasa 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. thds/tabularasa/__init__.py +6 -0
  2. thds/tabularasa/__main__.py +1122 -0
  3. thds/tabularasa/compat.py +33 -0
  4. thds/tabularasa/data_dependencies/__init__.py +0 -0
  5. thds/tabularasa/data_dependencies/adls.py +97 -0
  6. thds/tabularasa/data_dependencies/build.py +573 -0
  7. thds/tabularasa/data_dependencies/sqlite.py +286 -0
  8. thds/tabularasa/data_dependencies/tabular.py +167 -0
  9. thds/tabularasa/data_dependencies/util.py +209 -0
  10. thds/tabularasa/diff/__init__.py +0 -0
  11. thds/tabularasa/diff/data.py +346 -0
  12. thds/tabularasa/diff/schema.py +254 -0
  13. thds/tabularasa/diff/summary.py +249 -0
  14. thds/tabularasa/git_util.py +37 -0
  15. thds/tabularasa/loaders/__init__.py +0 -0
  16. thds/tabularasa/loaders/lazy_adls.py +44 -0
  17. thds/tabularasa/loaders/parquet_util.py +385 -0
  18. thds/tabularasa/loaders/sqlite_util.py +346 -0
  19. thds/tabularasa/loaders/util.py +532 -0
  20. thds/tabularasa/py.typed +0 -0
  21. thds/tabularasa/schema/__init__.py +7 -0
  22. thds/tabularasa/schema/compilation/__init__.py +20 -0
  23. thds/tabularasa/schema/compilation/_format.py +50 -0
  24. thds/tabularasa/schema/compilation/attrs.py +257 -0
  25. thds/tabularasa/schema/compilation/attrs_sqlite.py +278 -0
  26. thds/tabularasa/schema/compilation/io.py +96 -0
  27. thds/tabularasa/schema/compilation/pandas.py +252 -0
  28. thds/tabularasa/schema/compilation/pyarrow.py +93 -0
  29. thds/tabularasa/schema/compilation/sphinx.py +550 -0
  30. thds/tabularasa/schema/compilation/sqlite.py +69 -0
  31. thds/tabularasa/schema/compilation/util.py +117 -0
  32. thds/tabularasa/schema/constraints.py +327 -0
  33. thds/tabularasa/schema/dtypes.py +153 -0
  34. thds/tabularasa/schema/extract_from_parquet.py +132 -0
  35. thds/tabularasa/schema/files.py +215 -0
  36. thds/tabularasa/schema/metaschema.py +1007 -0
  37. thds/tabularasa/schema/util.py +123 -0
  38. thds/tabularasa/schema/validation.py +878 -0
  39. thds/tabularasa/sqlite3_compat.py +41 -0
  40. thds/tabularasa/sqlite_from_parquet.py +34 -0
  41. thds/tabularasa/to_sqlite.py +56 -0
  42. thds_tabularasa-0.13.0.dist-info/METADATA +530 -0
  43. thds_tabularasa-0.13.0.dist-info/RECORD +46 -0
  44. thds_tabularasa-0.13.0.dist-info/WHEEL +5 -0
  45. thds_tabularasa-0.13.0.dist-info/entry_points.txt +2 -0
  46. thds_tabularasa-0.13.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,550 @@
1
+ import os
2
+ import re
3
+ import textwrap
4
+ import urllib.parse
5
+ from functools import lru_cache
6
+ from itertools import chain
7
+ from operator import itemgetter
8
+ from pathlib import Path
9
+ from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple, Union
10
+ from warnings import warn
11
+
12
+ import networkx as nx
13
+ from pydantic import AnyUrl
14
+
15
+ from thds.tabularasa.schema import metaschema
16
+ from thds.tabularasa.schema.dtypes import DType
17
+ from thds.tabularasa.schema.files import FileSourceMixin, LocalFileSourceMixin, TabularFileSource
18
+ from thds.tabularasa.schema.util import snake_to_title
19
+
20
+ # Misc
21
+ METADATA_COLUMNS = ("Name", "Value")
22
+ TABLE_COLUMNS = ("Name", "Type", "Nullable?", "Description")
23
+ SOURCE_COLUMNS = (
24
+ "Source",
25
+ "Update Frequency",
26
+ "Last Checked",
27
+ "Last Changed",
28
+ "Contributes To",
29
+ "Authority",
30
+ )
31
+ BADGE_EXTENSION = "*.svg"
32
+ HEADING_CHAR = "#"
33
+ DERIVATION_TITLE = "Derivation"
34
+ DEPENDENCIES_TITLE = "Sources"
35
+ METADATA_FIELDS = FileSourceMixin.__fields__
36
+ UNICODE_MAPPING = {
37
+ ">=": "≥",
38
+ "<=": "≤",
39
+ ">": "﹥",
40
+ "<": "﹤",
41
+ }
42
+ SEP = "/"
43
+ HEADING_UNDERLINE_CHARS = '=-^"'
44
+ MAX_HEADING_LEVEL = len(HEADING_UNDERLINE_CHARS)
45
+ HEADING_UNDERLINE_RE = re.compile("|".join(f"({re.escape(c)})+" for c in HEADING_UNDERLINE_CHARS))
46
+ MISSING_BADGE_MSG = (
47
+ "Curation badges could not be rendered. Make sure that curation_badge_path "
48
+ "and source_docs_path are both supplied in schema.build_options."
49
+ )
50
+
51
+
52
+ # Helper Classes/Functions
53
+
54
+
55
+ class DontSplitMe(str):
56
+ """A string that should not be split by textwrap."""
57
+
58
+ pass
59
+
60
+
61
+ def _wrap_table_field(max_width: int, text: Any) -> str:
62
+ if isinstance(text, (AnyUrl, DontSplitMe)):
63
+ return text
64
+ return "\n\n".join(textwrap.wrap(str(text), width=max_width, break_long_words=False))
65
+
66
+
67
+ def split_long_fields(
68
+ table_data: Iterable[Sequence], max_field_width: int = 80
69
+ ) -> List[Tuple[str, ...]]:
70
+ """Splits long row fields into multiple lines."""
71
+ return [tuple(_wrap_table_field(max_field_width, field) for field in row) for row in table_data]
72
+
73
+
74
+ def join_blocks(blocks: Iterable[str], sep: str) -> str:
75
+ return sep.join(filter(bool, blocks))
76
+
77
+
78
+ def heading(title: str, level: int) -> str:
79
+ char = HEADING_UNDERLINE_CHARS[level - 1]
80
+ return f"{title}\n{char * len(title)}"
81
+
82
+
83
+ def bold(text: str) -> str:
84
+ return f"**{text}**"
85
+
86
+
87
+ def italic(text: str) -> str:
88
+ return f"*{text}*"
89
+
90
+
91
+ def crossref_label(label: str) -> str:
92
+ return f".. _{label}:"
93
+
94
+
95
+ def crossref(label: str) -> str:
96
+ return f":ref:`{label}`"
97
+
98
+
99
+ def docref(doc_path: str) -> str:
100
+ # doc_path is the relative path to the document being referenced without the extension
101
+ # example: "tables/ds_esri_state"
102
+ return f":doc:`{doc_path}`"
103
+
104
+
105
+ def anonymous_hyperlink(link_text: str, link: str) -> DontSplitMe:
106
+ return DontSplitMe(f"`{link_text} <{link}>`__")
107
+
108
+
109
+ def escape(text: str) -> str:
110
+ # stopgap measure; haven't been able to prevent sphinx from rendering e.g. `\>\=` as &amp;gt in html
111
+ for i, o in UNICODE_MAPPING.items():
112
+ text = text.replace(i, o)
113
+ return text
114
+
115
+
116
+ @lru_cache
117
+ def __tabulate() -> Optional[Any]:
118
+ try:
119
+ from tabulate import tabulate
120
+
121
+ return tabulate
122
+ except ImportError:
123
+ warn(
124
+ "tabulate is unavailable; can't render sphinx documentation. "
125
+ "Specify the 'cli' extra to ensure this dependency is present."
126
+ )
127
+ return None
128
+
129
+
130
+ def render_table(
131
+ header: Tuple[str, ...],
132
+ rows: Iterable[Sequence],
133
+ tablefmt: str = "grid",
134
+ max_field_width: Optional[int] = 80,
135
+ ) -> str:
136
+ tabulate = __tabulate()
137
+ assert tabulate is not None, "can't render tables in rst without `tabulate` dependency"
138
+ return tabulate(
139
+ rows if max_field_width is None else split_long_fields(rows, max_field_width),
140
+ headers=header,
141
+ tablefmt=tablefmt,
142
+ )
143
+
144
+
145
+ def render_figure(img_path: Path) -> str:
146
+ return f".. figure:: {img_path}"
147
+
148
+
149
+ def demote_heading_levels(markup_text: str, table_name: str, levels: int = 1) -> str:
150
+ """
151
+ Demotes each heading in the rst document `markup_text` text `level` times.
152
+ For example, if the text is
153
+
154
+ FOO
155
+ ===
156
+
157
+ then `level=2` will result in text
158
+
159
+ FOO
160
+ ^^^
161
+
162
+ Warns if demotion would exceed max heading levels.
163
+ """
164
+ exceeded_limit = False
165
+ output = []
166
+ lines = markup_text.splitlines()
167
+ for prior_line, line in zip(chain([None], lines), lines):
168
+ if (
169
+ prior_line is not None
170
+ and len(prior_line) == len(line)
171
+ and (match := HEADING_UNDERLINE_RE.fullmatch(line))
172
+ ):
173
+ level = next(i for i, s in enumerate(match.groups(), 1) if s)
174
+ new_level = level + levels
175
+ exceeded_limit = exceeded_limit or new_level > MAX_HEADING_LEVEL
176
+ new_level = min(new_level, MAX_HEADING_LEVEL)
177
+ output.append(HEADING_UNDERLINE_CHARS[new_level - 1] * len(line))
178
+ else:
179
+ output.append(line)
180
+
181
+ if exceeded_limit:
182
+ warn(f"Demoting heading levels for table {table_name} will exceed max heading level.")
183
+
184
+ return "\n".join(output)
185
+
186
+
187
+ # Column Rendering
188
+ def render_column_type(
189
+ column_type: Union[
190
+ DType,
191
+ metaschema.AnonCustomType,
192
+ metaschema.CustomType,
193
+ metaschema.ArrayType,
194
+ metaschema.MappingType,
195
+ ],
196
+ ) -> str:
197
+ if isinstance(column_type, DType):
198
+ return column_type.value
199
+ elif isinstance(column_type, metaschema.CustomType):
200
+ return crossref(column_type.name)
201
+ elif isinstance(column_type, metaschema.AnonCustomType):
202
+ return column_type.type.value
203
+ elif isinstance(column_type, metaschema.ArrayType):
204
+ return "list[" + render_column_type(column_type.values) + "]"
205
+ elif isinstance(column_type, metaschema.MappingType):
206
+ return (
207
+ "map["
208
+ + render_column_type(column_type.keys)
209
+ + ": "
210
+ + render_column_type(column_type.values)
211
+ + "]"
212
+ )
213
+ else:
214
+ return str(column_type)
215
+
216
+
217
+ def render_column_name(column: metaschema.Column, tbl: metaschema.Table) -> str:
218
+ """Formats column name specially if it is part of the table's primary key."""
219
+ name = column.name
220
+ return bold(name) if tbl.primary_key and name in tbl.primary_key else name
221
+
222
+
223
+ def render_column_table(tbl: metaschema.Table) -> str:
224
+ return render_table(
225
+ TABLE_COLUMNS,
226
+ (
227
+ (
228
+ render_column_name(c, tbl),
229
+ render_column_type(c.type),
230
+ str(c.nullable),
231
+ c.doc.replace("\n", " "),
232
+ )
233
+ for c in tbl.columns
234
+ ),
235
+ )
236
+
237
+
238
+ # Derivation Rendering
239
+ def render_derivation_doc(tbl: metaschema.Table) -> str:
240
+ """Renders derivation docs. Markdown docs should not include a main title."""
241
+ derivation_docs = tbl.dependencies.docstring if tbl.dependencies else None
242
+ if derivation_docs:
243
+ return join_blocks(
244
+ [
245
+ heading(DERIVATION_TITLE, 2),
246
+ demote_heading_levels(derivation_docs, tbl.name, 2),
247
+ ],
248
+ "\n\n",
249
+ )
250
+ else:
251
+ return ""
252
+
253
+
254
+ # File metadata rendering
255
+ def format_repo_url(
256
+ file: LocalFileSourceMixin,
257
+ repo_root: Path,
258
+ repo_url: str,
259
+ name: Optional[str] = None,
260
+ ) -> DontSplitMe:
261
+ relative_file_path = str(file.full_path.absolute().relative_to(repo_root.absolute()))
262
+ file_path_url = urllib.parse.quote(relative_file_path)
263
+ url = f"{repo_url.rstrip('/')}/{file_path_url}"
264
+ # names may not be unique so we use anonymous refs (__) instead of named refs in those cases
265
+ return DontSplitMe(f"`{name or relative_file_path} <{url}>`{'__' if name else '_'}")
266
+
267
+
268
+ def extract_file_sources(
269
+ tbl: metaschema.Table,
270
+ schema: metaschema.Schema,
271
+ ) -> Dict[str, FileSourceMixin]:
272
+ """Iterates through a tables dependencies, gathering metadata. Will recur through reference dependencies."""
273
+
274
+ def inner(tbl: metaschema.Table, schema: metaschema.Schema):
275
+ if isinstance(tbl.dependencies, metaschema.RawDataDependencies):
276
+ for dep_name in tbl.dependencies.local:
277
+ local_dep = schema.local_data[dep_name]
278
+ yield metaschema.LocalRef(dep_name), local_dep
279
+ for dep_name in tbl.dependencies.adls:
280
+ remote_dep = schema.remote_data[dep_name]
281
+ yield metaschema.ADLSRef(dep_name), remote_dep
282
+ for dep_name in tbl.dependencies.reference:
283
+ table_dep = schema.tables[dep_name]
284
+ yield from inner(table_dep, schema)
285
+ elif isinstance(tbl.dependencies, TabularFileSource):
286
+ yield metaschema.TabularTextFileRef(tbl.name), tbl.dependencies
287
+
288
+ return dict(inner(tbl, schema))
289
+
290
+
291
+ def render_file_source(
292
+ meta: FileSourceMixin, header_title: Optional[str], repo_root: Path, repo_url: Optional[str] = None
293
+ ) -> str:
294
+ """Renders a metadata section for either a table or a dependency."""
295
+ meta_dict = {name: getattr(meta, name) for name in METADATA_FIELDS if getattr(meta, name)}
296
+ if repo_url and isinstance(meta, LocalFileSourceMixin):
297
+ meta_dict.update(github_link=format_repo_url(meta, repo_root, repo_url))
298
+ header = heading(header_title, 3) if header_title else ""
299
+ if meta_dict:
300
+ parts = [header, render_table(METADATA_COLUMNS, meta_dict.items())]
301
+ else:
302
+ parts = [header, "Not Available"]
303
+
304
+ return join_blocks(parts, "\n\n")
305
+
306
+
307
+ def render_dependencies_doc(
308
+ file_sources: Mapping[str, FileSourceMixin], repo_root: Path, repo_url: Optional[str] = None
309
+ ) -> str:
310
+ """Renders dependency docs with a metadata table for each dependency."""
311
+ if file_sources:
312
+ sole_source = len(file_sources) == 1
313
+ file_docs = (
314
+ render_file_source(
315
+ source, None if sole_source else snake_to_title(name, separator=" "), repo_root, repo_url
316
+ )
317
+ for name, source in sorted(file_sources.items(), key=itemgetter(0))
318
+ )
319
+ return join_blocks(chain([heading(DEPENDENCIES_TITLE, 2)], file_docs), "\n\n")
320
+ else:
321
+ return ""
322
+
323
+
324
+ # Table Rendering
325
+
326
+
327
+ def render_table_doc(
328
+ tbl: metaschema.Table,
329
+ schema: metaschema.Schema,
330
+ repo_root: Path,
331
+ repo_url: Optional[str] = None,
332
+ ) -> str:
333
+ """Renders a table's documentation."""
334
+ file_sources = extract_file_sources(tbl, schema)
335
+ parts = [
336
+ crossref_label(tbl.name),
337
+ heading(tbl.doc_title, 1),
338
+ tbl.doc,
339
+ render_column_table(tbl),
340
+ render_derivation_doc(tbl),
341
+ render_dependencies_doc(file_sources, repo_root, repo_url),
342
+ ]
343
+ return join_blocks(parts, "\n\n")
344
+
345
+
346
+ def render_type_entry(type: metaschema.CustomType) -> str:
347
+ entries: List[str] = [type.type.name.lower()]
348
+ entries.extend(map(escape, filter(None, (c.comment_expr() for c in type.constraints))))
349
+ enum = type.enum
350
+ if enum is not None:
351
+ entries.append("one of " + ", ".join(map("``{}``".format, sorted(enum.enum))))
352
+
353
+ return "\n".join(map("- {}".format, entries))
354
+
355
+
356
+ def render_constraint_docs(type: metaschema.CustomType) -> str:
357
+ return type.docstring or ""
358
+
359
+
360
+ def render_type_doc(custom_type: metaschema.CustomType) -> str:
361
+ """Renders a custom type's documentation"""
362
+ parts = [
363
+ crossref_label(custom_type.name),
364
+ heading(custom_type.class_name, 2),
365
+ render_constraint_docs(custom_type),
366
+ render_type_entry(custom_type),
367
+ ]
368
+ return join_blocks(parts, "\n\n")
369
+
370
+
371
+ def render_types_doc(schema: metaschema.Schema) -> str:
372
+ CUSTOM_TYPES = "Custom Types"
373
+ return join_blocks(
374
+ chain(
375
+ [heading(CUSTOM_TYPES, 1)],
376
+ map(render_type_doc, sorted(schema.types.values(), key=lambda t: t.class_name)),
377
+ ),
378
+ "\n\n",
379
+ )
380
+
381
+
382
+ # Source Data + Curation Report Rendering
383
+
384
+
385
+ def render_curation_status(schema: metaschema.Schema, repo_root: Path) -> str:
386
+ CURATION_STATUS = "Curation Status"
387
+ curation_badge_path = Path(p) if (p := schema.build_options.curation_badge_path) else None
388
+ source_docs_path = Path(p) if (p := schema.build_options.source_docs_path) else None
389
+
390
+ if (curation_badge_path is not None) and (source_docs_path is not None):
391
+ badge_relpath = Path(os.path.relpath(curation_badge_path, source_docs_path.parent))
392
+ badge_list = [bp.name for bp in (repo_root / curation_badge_path).glob(BADGE_EXTENSION)]
393
+ curation_badge_block = [render_figure(badge_relpath / b) for b in sorted(badge_list)]
394
+ else:
395
+ curation_badge_block = [italic(MISSING_BADGE_MSG)]
396
+
397
+ return join_blocks(
398
+ chain(
399
+ [heading(CURATION_STATUS, 2)],
400
+ curation_badge_block,
401
+ ),
402
+ "\n\n",
403
+ )
404
+
405
+
406
+ def render_source_name(
407
+ fs_name: str,
408
+ fs_data: FileSourceMixin,
409
+ repo_root: Path,
410
+ repo_url: Optional[str] = None,
411
+ ) -> DontSplitMe:
412
+ links = []
413
+ if fs_data.landing_page is not None:
414
+ links.append(anonymous_hyperlink("homepage", fs_data.landing_page))
415
+ if fs_data.url is not None:
416
+ links.append(anonymous_hyperlink("url", fs_data.url))
417
+ if repo_url and isinstance(fs_data, LocalFileSourceMixin):
418
+ links.append(format_repo_url(fs_data, repo_root, repo_url, name="github"))
419
+
420
+ return DontSplitMe(f"{fs_name} ({' | '.join(links)})" if links else fs_name)
421
+
422
+
423
+ def render_package_table_links(
424
+ fs_name: str,
425
+ schema: metaschema.Schema,
426
+ dep_graph: nx.DiGraph,
427
+ table_docs_relpath: Optional[Path] = None,
428
+ ) -> str:
429
+ """Gets a list of all package tables that the source data contributes to."""
430
+ descendants = chain.from_iterable(nx.dfs_successors(dep_graph, fs_name).values())
431
+ pkg_tables = [
432
+ table if table_docs_relpath is None else docref(table_docs_relpath / table)
433
+ for table in descendants
434
+ if not schema.tables[str(table)].transient
435
+ ]
436
+ return "; ".join(pkg_tables)
437
+
438
+
439
+ def render_source_info(
440
+ fs_name: str,
441
+ fs_data: FileSourceMixin,
442
+ schema: metaschema.Schema,
443
+ dep_graph: nx.DiGraph,
444
+ repo_root: Path,
445
+ repo_url: Optional[str] = None,
446
+ table_docs_relpath: Optional[Path] = None,
447
+ ) -> List[Any]:
448
+ return [
449
+ render_source_name(fs_name, fs_data, repo_root, repo_url),
450
+ fs_data.update_frequency,
451
+ fs_data.last_checked,
452
+ fs_data.last_updated,
453
+ render_package_table_links(fs_name, schema, dep_graph, table_docs_relpath),
454
+ fs_data.authority,
455
+ ]
456
+
457
+
458
+ def build_source_metadata(schema: metaschema.Schema, repo_root: Path) -> Dict[str, List[Any]]:
459
+ OPEN_ACCESS = "Open Access Data Sources"
460
+ LICENSED = "Licensed Data Sources"
461
+ INTERNAL = "Internal Data Sources"
462
+ source_meta: Dict[str, List[Any]] = {OPEN_ACCESS: [], LICENSED: [], INTERNAL: []}
463
+ sources = set()
464
+ dep_graph = schema.dependency_dag()
465
+
466
+ table_docs_dir = Path(p) if (p := schema.build_options.table_docs_dir) else None
467
+ source_docs_path = Path(p) if (p := schema.build_options.source_docs_path) else None
468
+ if (table_docs_dir is not None) and (source_docs_path is not None):
469
+ table_docs_relpath = Path(os.path.relpath(table_docs_dir, source_docs_path.parent))
470
+ else:
471
+ table_docs_relpath = None
472
+
473
+ for table in schema.package_tables:
474
+ file_sources = extract_file_sources(table, schema)
475
+ for fs_name, fs_data in file_sources.items():
476
+ if fs_name in sources:
477
+ continue
478
+ sources.add(fs_name)
479
+ fs_data_fmt = render_source_info(
480
+ fs_name,
481
+ fs_data,
482
+ schema,
483
+ dep_graph,
484
+ table_docs_relpath=table_docs_relpath,
485
+ repo_root=repo_root,
486
+ repo_url=schema.build_options.repo_url,
487
+ )
488
+ if fs_data.is_open_access:
489
+ stype = OPEN_ACCESS
490
+ elif fs_data.authority == "Trilliant":
491
+ stype = INTERNAL
492
+ else:
493
+ stype = LICENSED
494
+ source_meta[stype].append(fs_data_fmt)
495
+
496
+ for list_ in source_meta.values():
497
+ list_.sort(key=lambda list_: list_[0].lower())
498
+
499
+ return source_meta
500
+
501
+
502
+ def render_source_data_tables(schema: metaschema.Schema, repo_root: Path) -> str:
503
+ source_metadata = build_source_metadata(schema, repo_root)
504
+ parts = []
505
+ for k, source_data in source_metadata.items():
506
+ parts.append(heading(k, 3))
507
+ parts.append(render_table(SOURCE_COLUMNS, source_data))
508
+ return join_blocks(parts, "\n\n")
509
+
510
+
511
+ def render_source_doc(
512
+ schema: metaschema.Schema,
513
+ repo_root: Path,
514
+ ) -> str:
515
+ if schema.build_options.source_docs_path is None:
516
+ return ""
517
+
518
+ REPORT_TITLE = "Source Data Updates & Curation Report"
519
+ SOURCE_TITLE = "Source Data Update Status"
520
+
521
+ parts = [
522
+ heading(REPORT_TITLE, 1),
523
+ render_curation_status(schema, repo_root),
524
+ heading(SOURCE_TITLE, 2),
525
+ render_source_data_tables(schema, repo_root),
526
+ ]
527
+ return join_blocks(parts, "\n\n")
528
+
529
+
530
+ def render_sphinx_docs(
531
+ schema: metaschema.Schema,
532
+ repo_root: Path,
533
+ repo_url: Optional[str] = None,
534
+ ) -> Tuple[str, str, Dict[str, str]]:
535
+ """
536
+ Returns (types_doc, table_docs)
537
+ """
538
+ return (
539
+ render_types_doc(schema),
540
+ render_source_doc(schema, repo_root),
541
+ {
542
+ table.name: render_table_doc(
543
+ table,
544
+ schema,
545
+ repo_root=repo_root,
546
+ repo_url=repo_url,
547
+ )
548
+ for table in schema.package_tables
549
+ },
550
+ )
@@ -0,0 +1,69 @@
1
+ from typing import Optional, Tuple
2
+
3
+ from .. import metaschema
4
+ from .util import AUTOGEN_DISCLAIMER
5
+
6
+ SQL_TABLE_SCHEMA_TEMPLATE = """CREATE TABLE {name}(
7
+ {columns}
8
+ );"""
9
+
10
+
11
+ def index_name(table_name: str, *index_columns: str) -> str:
12
+ return f"idx_{table_name}_{'_'.join(index_columns)}".lower()
13
+
14
+
15
+ def render_sql_table_schema(table: metaschema.Table) -> str:
16
+ column_defs = []
17
+ for column in table.columns:
18
+ column_def = f"{column.snake_case_name} {column.dtype.sqlite}"
19
+ if not column.nullable:
20
+ column_def = f"{column_def} NOT NULL"
21
+ column_defs.append(column_def)
22
+
23
+ return SQL_TABLE_SCHEMA_TEMPLATE.format(
24
+ name=table.snake_case_name, columns=",\n ".join(column_defs)
25
+ )
26
+
27
+
28
+ def render_sql_index_schema(table: metaschema.Table) -> Optional[str]:
29
+ unique_constraints = {frozenset(c.unique) for c in table.unique_constraints}
30
+ index_defs = []
31
+ if table.primary_key:
32
+ table_constraints = (
33
+ f"CREATE UNIQUE INDEX {index_name(table.snake_case_name, *table.primary_key)} ON "
34
+ f'{table.snake_case_name}({", ".join(table.primary_key)});'
35
+ )
36
+ index_defs.append(table_constraints)
37
+
38
+ for index in table.indexes:
39
+ unique = "UNIQUE " if frozenset(index) in unique_constraints else ""
40
+ index_def = (
41
+ f"CREATE {unique}INDEX {index_name(table.snake_case_name, *index)} "
42
+ f'ON {table.snake_case_name}({", ".join(index)});'
43
+ )
44
+ index_defs.append(index_def)
45
+
46
+ return "\n\n".join(index_defs) if len(index_defs) else None
47
+
48
+
49
+ def render_sql_schema(schema: metaschema.Schema) -> Tuple[str, str]:
50
+ """Render SQL Create Table and Index DDL
51
+
52
+ :param schema: input metaschema definition to generate SQL DDL from
53
+
54
+ :return: Returns a two tuple where the first item is the create table DDL and the second is the
55
+ create index DDL
56
+ :rtype: Tuple[str, str]
57
+ """
58
+ defs = []
59
+ index_defs = []
60
+ for table in schema.package_tables:
61
+ defs.append(render_sql_table_schema(table))
62
+ index_defs.append(render_sql_index_schema(table))
63
+
64
+ create_table_ddl = f"-- {AUTOGEN_DISCLAIMER}\n\n" + "\n\n".join(defs).strip() + "\n"
65
+ create_index_ddl = (
66
+ f"-- {AUTOGEN_DISCLAIMER}\n\n" + "\n\n".join(filter(None, index_defs)).strip() + "\n"
67
+ )
68
+
69
+ return create_table_ddl, create_index_ddl