thds.tabularasa 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. thds/tabularasa/__init__.py +6 -0
  2. thds/tabularasa/__main__.py +1122 -0
  3. thds/tabularasa/compat.py +33 -0
  4. thds/tabularasa/data_dependencies/__init__.py +0 -0
  5. thds/tabularasa/data_dependencies/adls.py +97 -0
  6. thds/tabularasa/data_dependencies/build.py +573 -0
  7. thds/tabularasa/data_dependencies/sqlite.py +286 -0
  8. thds/tabularasa/data_dependencies/tabular.py +167 -0
  9. thds/tabularasa/data_dependencies/util.py +209 -0
  10. thds/tabularasa/diff/__init__.py +0 -0
  11. thds/tabularasa/diff/data.py +346 -0
  12. thds/tabularasa/diff/schema.py +254 -0
  13. thds/tabularasa/diff/summary.py +249 -0
  14. thds/tabularasa/git_util.py +37 -0
  15. thds/tabularasa/loaders/__init__.py +0 -0
  16. thds/tabularasa/loaders/lazy_adls.py +44 -0
  17. thds/tabularasa/loaders/parquet_util.py +385 -0
  18. thds/tabularasa/loaders/sqlite_util.py +346 -0
  19. thds/tabularasa/loaders/util.py +532 -0
  20. thds/tabularasa/py.typed +0 -0
  21. thds/tabularasa/schema/__init__.py +7 -0
  22. thds/tabularasa/schema/compilation/__init__.py +20 -0
  23. thds/tabularasa/schema/compilation/_format.py +50 -0
  24. thds/tabularasa/schema/compilation/attrs.py +257 -0
  25. thds/tabularasa/schema/compilation/attrs_sqlite.py +278 -0
  26. thds/tabularasa/schema/compilation/io.py +96 -0
  27. thds/tabularasa/schema/compilation/pandas.py +252 -0
  28. thds/tabularasa/schema/compilation/pyarrow.py +93 -0
  29. thds/tabularasa/schema/compilation/sphinx.py +550 -0
  30. thds/tabularasa/schema/compilation/sqlite.py +69 -0
  31. thds/tabularasa/schema/compilation/util.py +117 -0
  32. thds/tabularasa/schema/constraints.py +327 -0
  33. thds/tabularasa/schema/dtypes.py +153 -0
  34. thds/tabularasa/schema/extract_from_parquet.py +132 -0
  35. thds/tabularasa/schema/files.py +215 -0
  36. thds/tabularasa/schema/metaschema.py +1007 -0
  37. thds/tabularasa/schema/util.py +123 -0
  38. thds/tabularasa/schema/validation.py +878 -0
  39. thds/tabularasa/sqlite3_compat.py +41 -0
  40. thds/tabularasa/sqlite_from_parquet.py +34 -0
  41. thds/tabularasa/to_sqlite.py +56 -0
  42. thds_tabularasa-0.13.0.dist-info/METADATA +530 -0
  43. thds_tabularasa-0.13.0.dist-info/RECORD +46 -0
  44. thds_tabularasa-0.13.0.dist-info/WHEEL +5 -0
  45. thds_tabularasa-0.13.0.dist-info/entry_points.txt +2 -0
  46. thds_tabularasa-0.13.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,117 @@
1
+ import itertools
2
+ import re
3
+ import textwrap
4
+ from inspect import Signature, signature
5
+ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Type, Union
6
+
7
+ import thds.tabularasa.schema.metaschema as metaschema
8
+
9
+ AUTOGEN_DISCLAIMER = "This code is auto-generated; do not edit!"
10
+
11
+
12
+ def sorted_class_names_for_import(names: Iterable[str]) -> List[str]:
13
+ all_names = set(names)
14
+ names_upper = [name for name in all_names if name.isupper()]
15
+ class_names = all_names.difference(names_upper)
16
+ return sorted(names_upper) + sorted(class_names, key=str.lower)
17
+
18
+
19
+ def _list_literal(exprs: Iterable[str], linebreak: bool = True) -> str:
20
+ sep = ",\n " if linebreak else ", "
21
+ start = "\n " if linebreak else ""
22
+ end = ",\n" if linebreak else ""
23
+
24
+ exprs = iter(exprs)
25
+ try:
26
+ peek = next(exprs)
27
+ except StopIteration:
28
+ return "[]"
29
+ else:
30
+ return f"[{start}{sep.join(itertools.chain((peek,), exprs))}{end}]"
31
+
32
+
33
+ def _dict_literal(named_exprs: Iterable[Tuple[str, str]], linebreak: bool = True):
34
+ sep = ",\n " if linebreak else ", "
35
+ start = "\n " if linebreak else ""
36
+ end = ",\n" if linebreak else ""
37
+
38
+ keyval = "%s=%s".__mod__
39
+ named_exprs = iter(named_exprs)
40
+ try:
41
+ peek = next(named_exprs)
42
+ except StopIteration:
43
+ return "{}"
44
+ else:
45
+ return f"dict({start}{sep.join(map(keyval, itertools.chain((peek,), named_exprs)))}{end})" ""
46
+
47
+
48
+ def _indent(expr: str, level: int = 1, first_line: bool = False) -> str:
49
+ ws = " " * level
50
+ indented = textwrap.indent(expr, ws)
51
+ return indented if first_line else indented.lstrip()
52
+
53
+
54
+ def _wrap_lines_with_prefix(
55
+ text: str,
56
+ line_width: int,
57
+ first_line_prefix_len: int,
58
+ trailing_line_indent: int = 0,
59
+ ) -> str:
60
+ text_ = re.sub(r"\s+", " ", text).strip()
61
+ first_line = textwrap.shorten(text_, line_width - first_line_prefix_len, placeholder="")
62
+ tail = text_[len(first_line) :].lstrip()
63
+ if tail:
64
+ tail_lines = textwrap.wrap(tail, line_width)
65
+ if trailing_line_indent:
66
+ prefix = " " * trailing_line_indent
67
+ tail_lines = [prefix + line for line in tail_lines]
68
+
69
+ return "\n".join([first_line, *tail_lines])
70
+ else:
71
+ return first_line
72
+
73
+
74
+ def constructor_template(
75
+ type_: Union[Type, Callable],
76
+ module_name: Optional[str] = None,
77
+ sig: Optional[Signature] = None,
78
+ exclude: Optional[List[str]] = None,
79
+ type_params: Optional[List[str]] = None,
80
+ ) -> str:
81
+ module = module_name or type_.__module__
82
+ name = type_.__name__
83
+ if sig is None:
84
+ if isinstance(type_, type):
85
+ sig = signature(type_.__init__) # type: ignore
86
+ is_method = True
87
+ else:
88
+ sig = signature(type_)
89
+ is_method = False
90
+ params = list(sig.parameters)[1:] if is_method else list(sig.parameters)
91
+ else:
92
+ params = list(sig.parameters)
93
+ exclude_ = exclude or []
94
+ args = ",\n ".join(f"{name}={{{name}}}" for name in params if name not in exclude_)
95
+ type_params_ = f"[{', '.join(type_params)}]" if type_params else ""
96
+ template = f"{module}.{name}{type_params_}(\n {args},\n)"
97
+ return template
98
+
99
+
100
+ def render_constructor(template: str, kwargs: Dict[str, Any], var_name: Optional[str] = None) -> str:
101
+ kwarg_strs = {name: repr(value) for name, value in kwargs.items()}
102
+ rendered = template.format(**kwarg_strs)
103
+ return rendered if var_name is None else f"{var_name} = {rendered}"
104
+
105
+
106
+ class VarName(str):
107
+ def __repr__(self):
108
+ return self
109
+
110
+
111
+ BLOB_STORE_SPEC_TEMPLATE = constructor_template(
112
+ metaschema.RemoteBlobStoreSpec, sig=signature(metaschema.RemoteBlobStoreSpec)
113
+ )
114
+
115
+
116
+ def render_blob_store_def(blob_store: metaschema.RemoteBlobStoreSpec, var_name: str) -> str:
117
+ return render_constructor(BLOB_STORE_SPEC_TEMPLATE, kwargs=blob_store.dict(), var_name=var_name)
@@ -0,0 +1,327 @@
1
+ import re
2
+ from enum import Enum
3
+ from typing import Dict, List, Optional, Pattern, Tuple, Union
4
+
5
+ import pandas as pd
6
+ import pandera as pa
7
+ from pydantic import BaseModel, Extra, StrictFloat, StrictInt
8
+
9
+ from .dtypes import DType
10
+ from .util import EnumList
11
+
12
+ Numeric = Union[StrictInt, StrictFloat]
13
+
14
+
15
+ class ColumnConstraint(BaseModel, extra=Extra.forbid):
16
+ __dtypes__: Tuple[DType, ...] = ()
17
+
18
+ def applies_to(self, dtype: DType) -> bool:
19
+ return dtype in self.__dtypes__
20
+
21
+ def pandera_check_expr(self) -> str:
22
+ raise NotImplementedError(
23
+ f"{type(self).__name__} must implement pandera_check producing an expression "
24
+ "equivalent to the expression produced by the `.pandera_check` method "
25
+ "(with pandera aliased to 'pa')"
26
+ )
27
+
28
+ def pandera_check(self) -> pa.Check:
29
+ raise NotImplementedError(
30
+ f"{type(self).__name__} must implement pandera_check producing a pandera.Check object "
31
+ "equivalent to the expression produced by the `.pandera_check_expr` method"
32
+ )
33
+
34
+ def sqlite_check_expr(self, colname: str) -> str:
35
+ raise NotImplementedError(f"sqlite check constraint not implemented for constraint {self}")
36
+
37
+ def required_modules(self) -> List[str]:
38
+ """list of stdlib modules required for constraint checks"""
39
+ return []
40
+
41
+ def comment_expr(self) -> Optional[str]:
42
+ return None
43
+
44
+
45
+ class StrConstraint(ColumnConstraint):
46
+ __dtypes__ = (DType.STR,)
47
+
48
+
49
+ class LenConstraint(StrConstraint):
50
+ __operator__: str
51
+ __value_attr__: str
52
+
53
+ def pandera_check_expr(self) -> str:
54
+ kwargs_ = ", ".join(f"{k}={v!r}" for k, v in self._pandera_check_kwargs().items())
55
+ return f"pa.{pa.Check.__name__}.{pa.Check.str_length.__name__}({kwargs_})"
56
+
57
+ def pandera_check(self) -> pa.Check:
58
+ return pa.Check.str_length(**self._pandera_check_kwargs())
59
+
60
+ def _pandera_check_kwargs(self) -> Dict[str, int]:
61
+ kw = "max_value" if "<" in self.__operator__ else "min_value"
62
+ value = getattr(self, self.__value_attr__)
63
+ if "=" not in self.__operator__:
64
+ # non-inclusive bound
65
+ if "<" in self.__operator__:
66
+ value -= 1
67
+ else:
68
+ value += 1
69
+ return {kw: value}
70
+
71
+ def sqlite_check_expr(self, colname: str) -> str:
72
+ return f"length({colname}) {self.__operator__} {getattr(self, self.__value_attr__)!r}"
73
+
74
+ def comment_expr(self) -> Optional[str]:
75
+ return f"length {self.__operator__} {getattr(self, self.__value_attr__)!r}"
76
+
77
+
78
+ class NumConstraint(ColumnConstraint):
79
+ __dtypes__ = tuple(t for t in DType if t.is_float_type or t.is_int_type)
80
+
81
+
82
+ class OrderConstraint(NumConstraint):
83
+ __operator__: str
84
+ __value_attr__: str
85
+
86
+ def applies_to(self, dtype: DType) -> bool:
87
+ value = getattr(self, self.__value_attr__)
88
+ if dtype.is_int_type:
89
+ return isinstance(value, int)
90
+ elif dtype.is_float_type:
91
+ return isinstance(value, float)
92
+ return False
93
+
94
+ def pandera_check_expr(self) -> str:
95
+ return f"pa.{pa.Check.__name__}.{self.__value_attr__}({getattr(self, self.__value_attr__)!r})"
96
+
97
+ def pandera_check(self) -> pa.Check:
98
+ return getattr(pa.Check, self.__value_attr__)(getattr(self, self.__value_attr__))
99
+
100
+ def sqlite_check_expr(self, colname) -> str:
101
+ return f"{colname} {self.__operator__} {getattr(self, self.__value_attr__)!r}"
102
+
103
+ def comment_expr(self) -> Optional[str]:
104
+ return f"{self.__operator__} {getattr(self, self.__value_attr__)!r}"
105
+
106
+
107
+ class LessThanOrEqual(OrderConstraint):
108
+ le: Numeric
109
+ __operator__ = "<="
110
+ __value_attr__ = "le"
111
+
112
+
113
+ class GreaterThanOrEqual(OrderConstraint):
114
+ ge: Numeric
115
+ __operator__ = ">="
116
+ __value_attr__ = "ge"
117
+
118
+
119
+ class LessThan(OrderConstraint):
120
+ lt: Numeric
121
+ __operator__ = "<"
122
+ __value_attr__ = "lt"
123
+
124
+
125
+ class GreaterThan(OrderConstraint):
126
+ gt: Numeric
127
+ __operator__ = ">"
128
+ __value_attr__ = "gt"
129
+
130
+
131
+ class EqualModulo(NumConstraint):
132
+ eq: Numeric
133
+ mod: Numeric
134
+
135
+ def sqlite_check_expr(self, colname: str) -> str:
136
+ return f"{colname} % {self.mod} = {self.eq}"
137
+
138
+ def pandera_check_expr(self) -> str:
139
+ return f"pa.{pa.Check.__name__}(lambda s: (s % {self.mod} == {self.eq}), name={repr(self)!r})"
140
+
141
+ def pandera_check(self) -> pa.Check:
142
+ return pa.Check(lambda s: (s % self.mod == self.eq), name=repr(self))
143
+
144
+ def comment_expr(self) -> str:
145
+ return f"equals {self.eq!r} modulo {self.mod!r}"
146
+
147
+
148
+ class LenLessThanOrEqual(LenConstraint):
149
+ len_le: StrictInt
150
+ __operator__ = "<="
151
+ __value_attr__ = "len_le"
152
+
153
+
154
+ class LenGreaterThanOrEqual(LenConstraint):
155
+ len_ge: StrictInt
156
+ __operator__ = ">="
157
+ __value_attr__ = "len_ge"
158
+
159
+
160
+ class LenLessThan(LenConstraint):
161
+ len_lt: StrictInt
162
+ __operator__ = "<"
163
+ __value_attr__ = "len_lt"
164
+
165
+
166
+ class LenGreaterThan(LenConstraint):
167
+ len_gt: StrictInt
168
+ __operator__ = ">"
169
+ __value_attr__ = "len_gt"
170
+
171
+
172
+ class StrCase(Enum):
173
+ lower = "lower"
174
+ upper = "upper"
175
+
176
+
177
+ class StrChars(Enum):
178
+ alpha = "alpha"
179
+ alphanumeric = "alnum"
180
+ digit = "digit"
181
+ decimal = "decimal"
182
+ title = "title"
183
+
184
+
185
+ class CaseConstraint(StrConstraint):
186
+ case: StrCase
187
+
188
+ def pandera_check_expr(self) -> str:
189
+ method_name = self._pandas_str_method_name()
190
+ check_name = self._check_name()
191
+ return (
192
+ f"pa.{pa.Check.__name__}(lambda s: s.str.{method_name}().fillna(True), name={check_name!r})"
193
+ )
194
+
195
+ def pandera_check(self) -> pa.Check:
196
+ method_name = self._pandas_str_method_name()
197
+ check_name = self._check_name()
198
+ return pa.Check(lambda s: getattr(s.str, method_name)().fillna(True), name=check_name)
199
+
200
+ def _check_name(self) -> str:
201
+ return f"case={self.case.value}"
202
+
203
+ def _pandas_str_method_name(self) -> str:
204
+ return f"is{self.case.value}"
205
+
206
+ def sqlite_check_expr(self, colname: str) -> str:
207
+ return f"{colname} = {self.case.value}({colname})"
208
+
209
+ def comment_expr(self) -> Optional[str]:
210
+ return self.case.value + "case"
211
+
212
+
213
+ class CharsConstraint(StrConstraint):
214
+ chars: StrChars
215
+
216
+ def pandera_check_expr(self) -> str:
217
+ method_name = self._pandas_str_method_name()
218
+ check_name = self._check_name()
219
+ return (
220
+ f"pa.{pa.Check.__name__}(lambda s: s.str.{method_name}().fillna(True), name={check_name!r})"
221
+ )
222
+
223
+ def pandera_check(self) -> pa.Check:
224
+ method_name = self._pandas_str_method_name()
225
+ check_name = self._check_name()
226
+ return pa.Check(lambda s: getattr(s.str, method_name)().fillna(True), name=check_name)
227
+
228
+ def _check_name(self) -> str:
229
+ return f"chars={self.chars.value}"
230
+
231
+ def _pandas_str_method_name(self) -> str:
232
+ return f"is{self.chars.value}"
233
+
234
+ def comment_expr(self) -> Optional[str]:
235
+ return self.chars.name + " pattern"
236
+
237
+
238
+ class MatchesRegex(StrConstraint):
239
+ matches: Pattern
240
+ fullmatch: bool = True
241
+ case_sensitive: bool = True
242
+
243
+ def pandera_check_expr(self) -> str:
244
+ method_name = "fullmatch" if self.fullmatch else "match"
245
+ check_name = self._check_name()
246
+ return (
247
+ f"pa.{pa.Check.__name__}(lambda s: s.str.{method_name}"
248
+ f"(re.compile({self.matches.pattern!r}), "
249
+ f"case={self.case_sensitive}, na=True), name={check_name!r})"
250
+ )
251
+
252
+ def pandera_check(self) -> pa.Check:
253
+ check_name = self._check_name()
254
+ if self.fullmatch:
255
+
256
+ def check_fn(s: pd.Series):
257
+ return s.str.fullmatch(
258
+ re.compile(self.matches.pattern), case=self.case_sensitive, na=True # type: ignore[arg-type]
259
+ )
260
+
261
+ else:
262
+
263
+ def check_fn(s: pd.Series):
264
+ return s.str.match(
265
+ re.compile(self.matches.pattern), # type: ignore[arg-type]
266
+ case=self.case_sensitive,
267
+ na=True,
268
+ )
269
+
270
+ # TODO - check above type ignores
271
+
272
+ return pa.Check(check_fn, name=check_name)
273
+
274
+ def _check_name(self) -> str:
275
+ return f"{type(self).__name__}(fullmatch={self.fullmatch}, case_sensitive={self.case_sensitive})"
276
+
277
+ def required_modules(self) -> List[str]:
278
+ return ["re"]
279
+
280
+ def comment_expr(self) -> Optional[str]:
281
+ extras = [
282
+ "full match" if self.fullmatch else "prefix match",
283
+ "case sensitive" if self.case_sensitive else "case insensitive",
284
+ ]
285
+ extra = f" ({', '.join(extras)})"
286
+ return f"matches ``{self.matches.pattern}``{extra}"
287
+
288
+
289
+ class EnumConstraint(ColumnConstraint):
290
+ __dtypes__ = (DType.STR, *(t for t in DType if t.is_int_type or t.is_float_type))
291
+
292
+ enum: EnumList
293
+ ordered: bool = False
294
+
295
+ def pandera_check_expr(self) -> str:
296
+ # pandera doesn't support checking specific categoricals natively, only
297
+ # that a column has a categorical dtype, so we check the values here
298
+ return f"pa.{pa.Check.__name__}.isin({self.enum!r})"
299
+
300
+ def pandera_check(self) -> pa.Check:
301
+ return pa.Check.isin(self.enum)
302
+
303
+ def sqlite_check_expr(self, colname: str) -> str:
304
+ return f'{colname} IN ({", ".join(map(repr, self.enum))})'
305
+
306
+ def applies_to(self, dtype: DType) -> bool:
307
+ if not self.enum:
308
+ return True
309
+ value_type = type(self.enum[0])
310
+ return issubclass(value_type, dtype.python)
311
+
312
+
313
+ AnyColumnConstraint = Union[
314
+ LessThanOrEqual,
315
+ GreaterThanOrEqual,
316
+ LessThan,
317
+ GreaterThan,
318
+ EqualModulo,
319
+ LenLessThanOrEqual,
320
+ LenGreaterThanOrEqual,
321
+ LenLessThan,
322
+ LenGreaterThan,
323
+ EnumConstraint,
324
+ CaseConstraint,
325
+ CharsConstraint,
326
+ MatchesRegex,
327
+ ]
@@ -0,0 +1,153 @@
1
+ import builtins
2
+ import datetime
3
+ from enum import Enum
4
+ from typing import Any, Callable, Iterator, Optional, Set, Type, Union
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ import pyarrow
9
+ from pandas.core.dtypes import base as pd_dtypes
10
+
11
+ from thds.tabularasa.compat import PANDAS_VERSION_LT_2_0
12
+
13
+ from .util import EnumList, Identifier
14
+
15
+ AnyDtype = Union[pd_dtypes.ExtensionDtype, np.dtype]
16
+ PyType = Union[int, float, bool, str, datetime.date, datetime.datetime]
17
+
18
+
19
+ _dtype_name_to_pd_dtype = dict(
20
+ int8=pd.Int8Dtype(),
21
+ int16=pd.Int16Dtype(),
22
+ int32=pd.Int32Dtype(),
23
+ int64=pd.Int64Dtype(),
24
+ uint8=pd.UInt8Dtype(),
25
+ uint16=pd.UInt16Dtype(),
26
+ uint32=pd.UInt32Dtype(),
27
+ uint64=pd.UInt64Dtype(),
28
+ bool=pd.BooleanDtype(),
29
+ str=pd.StringDtype(),
30
+ )
31
+
32
+
33
+ class DType(Enum):
34
+ INT8 = "int8"
35
+ INT16 = "int16"
36
+ INT32 = "int32"
37
+ INT64 = "int64"
38
+ UINT8 = "uint8"
39
+ UINT16 = "uint16"
40
+ UINT32 = "uint32"
41
+ UINT64 = "uint64"
42
+ FLOAT32 = "float32"
43
+ FLOAT64 = "float64"
44
+ STR = "str"
45
+ DATE = "date"
46
+ DATETIME = "datetime"
47
+ BOOL = "bool"
48
+
49
+ @property
50
+ def enum(self) -> None:
51
+ return None
52
+
53
+ @property
54
+ def is_int_type(self):
55
+ return self.value.startswith("int") or self.value.startswith("uint")
56
+
57
+ @property
58
+ def is_float_type(self):
59
+ return self.value.startswith("float")
60
+
61
+ def pandas(
62
+ self,
63
+ nullable: bool = False,
64
+ index: bool = False,
65
+ enum: Optional[EnumList] = None,
66
+ ordered: bool = False,
67
+ ) -> Union[np.dtype, pd_dtypes.ExtensionDtype]:
68
+ if enum:
69
+ return pd.CategoricalDtype(enum, ordered=ordered)
70
+ elif self == DType.DATE or self == DType.DATETIME:
71
+ # pandas <2.0 *only* supports nanosecond datetimes so we're safe to use this
72
+ return np.dtype("datetime64[ns]")
73
+ elif self == DType.BOOL:
74
+ return pd.BooleanDtype() if nullable else np.dtype("bool")
75
+ elif self == DType.STR:
76
+ # StringDtype is better than dtype('O') for explicitness and null handling,
77
+ # but note that pandas<1.4 coerces to dtype('O') for indexes - we used to handle that case
78
+ # but it is no longer necessary in pandas>=1.4
79
+ return pd.StringDtype()
80
+ else:
81
+ # int and float types
82
+ if nullable and self.is_int_type:
83
+ # nullable int extension types
84
+ return _dtype_name_to_pd_dtype[self.value]
85
+ else:
86
+ # non-nullable ints or floats
87
+ if index and PANDAS_VERSION_LT_2_0:
88
+ # no low-resolution types on indexes with pandas<2.0
89
+ if self.is_float_type:
90
+ return np.dtype("float")
91
+ else:
92
+ return np.dtype("int") if self.value.startswith("int") else np.dtype("uint")
93
+ else:
94
+ return np.dtype(self.value)
95
+
96
+ @property
97
+ def sqlite(self) -> str:
98
+ if self == DType.BOOL:
99
+ return "BOOLEAN"
100
+ elif self == DType.STR:
101
+ return "TEXT"
102
+ else:
103
+ # only one true float and int type in sqlite, but all aliases accepted; we keep them as-is
104
+ # for explicitness
105
+ return self.name
106
+
107
+ @property
108
+ def python(self) -> Type[PyType]:
109
+ if self.is_int_type:
110
+ return int
111
+ elif self.is_float_type:
112
+ return float
113
+ elif self == DType.STR:
114
+ return str
115
+ elif self == DType.DATE:
116
+ return datetime.date
117
+ elif self == DType.DATETIME:
118
+ return datetime.datetime
119
+ elif self == DType.BOOL:
120
+ return bool
121
+ else:
122
+ raise TypeError(f"No python type registered for {self}")
123
+
124
+ def python_type_literal(self, build_options: Any = None, builtin: bool = False) -> str:
125
+ cls = self.python
126
+ if cls.__module__ == builtins.__name__:
127
+ # int, str, bool, etc
128
+ return cls.__name__
129
+ else:
130
+ return f"{cls.__module__}.{cls.__name__}"
131
+
132
+ @property
133
+ def custom_type_refs(self) -> Iterator[Identifier]:
134
+ yield from ()
135
+
136
+ @property
137
+ def parquet(self) -> pyarrow.DataType:
138
+ if self == DType.STR:
139
+ return pyarrow.string()
140
+ elif self == DType.BOOL:
141
+ return pyarrow.bool_()
142
+ elif self is DType.DATETIME:
143
+ return pyarrow.timestamp("ns")
144
+ elif self is DType.DATE:
145
+ return pyarrow.date32()
146
+ else:
147
+ dtype: Callable[[], pyarrow.DataType] = getattr(pyarrow, self.name.lower())
148
+ return dtype()
149
+
150
+ def attrs_required_imports(self, build_options: Any = None) -> Set[str]:
151
+ if self in (DType.DATE, DType.DATETIME):
152
+ return {"datetime"}
153
+ return set()