ygg 0.1.57__py3-none-any.whl → 0.1.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/METADATA +1 -1
- ygg-0.1.60.dist-info/RECORD +74 -0
- yggdrasil/ai/__init__.py +2 -0
- yggdrasil/ai/session.py +89 -0
- yggdrasil/ai/sql_session.py +310 -0
- yggdrasil/databricks/__init__.py +0 -3
- yggdrasil/databricks/compute/cluster.py +68 -113
- yggdrasil/databricks/compute/command_execution.py +674 -0
- yggdrasil/databricks/compute/exceptions.py +19 -0
- yggdrasil/databricks/compute/execution_context.py +491 -282
- yggdrasil/databricks/compute/remote.py +4 -14
- yggdrasil/databricks/exceptions.py +10 -0
- yggdrasil/databricks/sql/__init__.py +0 -4
- yggdrasil/databricks/sql/engine.py +161 -173
- yggdrasil/databricks/sql/exceptions.py +9 -1
- yggdrasil/databricks/sql/statement_result.py +108 -120
- yggdrasil/databricks/sql/warehouse.py +331 -92
- yggdrasil/databricks/workspaces/io.py +89 -9
- yggdrasil/databricks/workspaces/path.py +120 -72
- yggdrasil/databricks/workspaces/workspace.py +214 -61
- yggdrasil/exceptions.py +7 -0
- yggdrasil/libs/databrickslib.py +23 -18
- yggdrasil/libs/extensions/spark_extensions.py +1 -1
- yggdrasil/libs/pandaslib.py +15 -6
- yggdrasil/libs/polarslib.py +49 -13
- yggdrasil/pyutils/__init__.py +1 -2
- yggdrasil/pyutils/callable_serde.py +12 -19
- yggdrasil/pyutils/exceptions.py +16 -0
- yggdrasil/pyutils/python_env.py +14 -13
- yggdrasil/pyutils/waiting_config.py +171 -0
- yggdrasil/types/cast/arrow_cast.py +3 -0
- yggdrasil/types/cast/pandas_cast.py +157 -169
- yggdrasil/types/cast/polars_cast.py +11 -43
- yggdrasil/types/dummy_class.py +81 -0
- yggdrasil/version.py +1 -1
- ygg-0.1.57.dist-info/RECORD +0 -66
- yggdrasil/databricks/ai/loki.py +0 -53
- {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/WHEEL +0 -0
- {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.57.dist-info → ygg-0.1.60.dist-info}/top_level.txt +0 -0
- /yggdrasil/{databricks/ai/__init__.py → pyutils/mimetypes.py} +0 -0
|
@@ -11,9 +11,10 @@ from .arrow_cast import (
|
|
|
11
11
|
)
|
|
12
12
|
from .cast_options import CastOptions
|
|
13
13
|
from .registry import register_converter
|
|
14
|
-
from ...libs.pandaslib import pandas,
|
|
14
|
+
from ...libs.pandaslib import pandas, PandasDataFrame, PandasSeries
|
|
15
15
|
|
|
16
16
|
__all__ = [
|
|
17
|
+
"pandas_converter",
|
|
17
18
|
"cast_pandas_series",
|
|
18
19
|
"cast_pandas_dataframe",
|
|
19
20
|
"arrow_array_to_pandas_series",
|
|
@@ -29,11 +30,6 @@ __all__ = [
|
|
|
29
30
|
# ---------------------------------------------------------------------------
|
|
30
31
|
|
|
31
32
|
if pandas is not None:
|
|
32
|
-
require_pandas()
|
|
33
|
-
|
|
34
|
-
PandasSeries = pandas.Series
|
|
35
|
-
PandasDataFrame = pandas.DataFrame
|
|
36
|
-
|
|
37
33
|
def pandas_converter(*args, **kwargs):
|
|
38
34
|
"""Return a register_converter wrapper when pandas is available.
|
|
39
35
|
|
|
@@ -47,14 +43,6 @@ if pandas is not None:
|
|
|
47
43
|
return register_converter(*args, **kwargs)
|
|
48
44
|
|
|
49
45
|
else:
|
|
50
|
-
# Dummy types so annotations/decorators don't explode without pandas
|
|
51
|
-
class _PandasDummy: # pragma: no cover - only used when pandas not installed
|
|
52
|
-
"""Placeholder type for pandas symbols when pandas is unavailable."""
|
|
53
|
-
pass
|
|
54
|
-
|
|
55
|
-
PandasSeries = _PandasDummy
|
|
56
|
-
PandasDataFrame = _PandasDummy
|
|
57
|
-
|
|
58
46
|
def pandas_converter(*_args, **_kwargs): # pragma: no cover - no-op decorator
|
|
59
47
|
"""Return a no-op decorator when pandas is unavailable.
|
|
60
48
|
|
|
@@ -83,188 +71,188 @@ else:
|
|
|
83
71
|
# pandas <-> Arrow via ArrowCastOptions
|
|
84
72
|
# ---------------------------------------------------------------------------
|
|
85
73
|
|
|
74
|
+
if pandas is not None:
|
|
75
|
+
@pandas_converter(PandasSeries, PandasSeries)
|
|
76
|
+
def cast_pandas_series(
|
|
77
|
+
series: PandasSeries,
|
|
78
|
+
options: Optional[CastOptions] = None,
|
|
79
|
+
) -> PandasSeries:
|
|
80
|
+
"""
|
|
81
|
+
Cast a pandas Series to a target Arrow type using Arrow casting rules.
|
|
86
82
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
series: "pandas.Series",
|
|
90
|
-
options: Optional[CastOptions] = None,
|
|
91
|
-
) -> "pandas.Series":
|
|
92
|
-
"""
|
|
93
|
-
Cast a pandas Series to a target Arrow type using Arrow casting rules.
|
|
83
|
+
The target type/field should be provided via `options` (e.g. options.target_schema
|
|
84
|
+
or options.target_field, depending on how ArrowCastOptions is defined).
|
|
94
85
|
|
|
95
|
-
|
|
96
|
-
|
|
86
|
+
Arrow does:
|
|
87
|
+
- type cast
|
|
88
|
+
- nullability enforcement
|
|
89
|
+
- default handling (via cast_arrow_array)
|
|
90
|
+
We then convert back to pandas and restore index/name.
|
|
91
|
+
"""
|
|
92
|
+
options = CastOptions.check_arg(options)
|
|
97
93
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
- nullability enforcement
|
|
101
|
-
- default handling (via cast_arrow_array)
|
|
102
|
-
We then convert back to pandas and restore index/name.
|
|
103
|
-
"""
|
|
104
|
-
options = CastOptions.check_arg(options)
|
|
94
|
+
arrow_array = pa.array(series, from_pandas=True)
|
|
95
|
+
casted = cast_arrow_array(arrow_array, options)
|
|
105
96
|
|
|
106
|
-
|
|
107
|
-
|
|
97
|
+
result = casted.to_pandas()
|
|
98
|
+
result.index = series.index
|
|
99
|
+
result.name = series.name
|
|
100
|
+
return result
|
|
108
101
|
|
|
109
|
-
result = casted.to_pandas()
|
|
110
|
-
result.index = series.index
|
|
111
|
-
result.name = series.name
|
|
112
|
-
return result
|
|
113
102
|
|
|
103
|
+
@pandas_converter(PandasDataFrame, PandasDataFrame)
|
|
104
|
+
def cast_pandas_dataframe(
|
|
105
|
+
dataframe: PandasDataFrame,
|
|
106
|
+
options: Optional[CastOptions] = None,
|
|
107
|
+
) -> PandasDataFrame:
|
|
108
|
+
"""
|
|
109
|
+
Cast a pandas DataFrame to a target Arrow schema using Arrow casting rules.
|
|
114
110
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
dataframe: "pandas.DataFrame",
|
|
118
|
-
options: Optional[CastOptions] = None,
|
|
119
|
-
) -> "pandas.DataFrame":
|
|
120
|
-
"""
|
|
121
|
-
Cast a pandas DataFrame to a target Arrow schema using Arrow casting rules.
|
|
111
|
+
Behavior is analogous to the Polars version, but we delegate casting to
|
|
112
|
+
`cast_arrow_table` and then adjust columns on the pandas side:
|
|
122
113
|
|
|
123
|
-
|
|
124
|
-
|
|
114
|
+
- options.target_schema: Arrow schema / field used by cast_arrow_table
|
|
115
|
+
- options.allow_add_columns:
|
|
116
|
+
* False: result only has columns from the cast Arrow table
|
|
117
|
+
* True: extra pandas columns (not in the target schema / cast result)
|
|
118
|
+
are appended unchanged
|
|
119
|
+
"""
|
|
120
|
+
options = CastOptions.check_arg(options)
|
|
125
121
|
|
|
126
|
-
|
|
127
|
-
- options.allow_add_columns:
|
|
128
|
-
* False: result only has columns from the cast Arrow table
|
|
129
|
-
* True: extra pandas columns (not in the target schema / cast result)
|
|
130
|
-
are appended unchanged
|
|
131
|
-
"""
|
|
132
|
-
options = CastOptions.check_arg(options)
|
|
122
|
+
original_index = dataframe.index
|
|
133
123
|
|
|
134
|
-
|
|
124
|
+
arrow_table = pa.Table.from_pandas(dataframe, preserve_index=False)
|
|
125
|
+
casted_table = cast_arrow_tabular(arrow_table, options)
|
|
135
126
|
|
|
136
|
-
|
|
137
|
-
|
|
127
|
+
result = casted_table.to_pandas()
|
|
128
|
+
result.index = original_index
|
|
138
129
|
|
|
139
|
-
|
|
140
|
-
|
|
130
|
+
if getattr(options, "allow_add_columns", False):
|
|
131
|
+
casted_cols = set(result.columns)
|
|
132
|
+
extra_cols = [col for col in dataframe.columns if col not in casted_cols]
|
|
141
133
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
134
|
+
if extra_cols:
|
|
135
|
+
extra_df = dataframe[extra_cols]
|
|
136
|
+
extra_df.index = result.index
|
|
137
|
+
result = pandas.concat([result, extra_df], axis=1)
|
|
145
138
|
|
|
146
|
-
|
|
147
|
-
extra_df = dataframe[extra_cols]
|
|
148
|
-
extra_df.index = result.index
|
|
149
|
-
result = pandas.concat([result, extra_df], axis=1)
|
|
139
|
+
return result
|
|
150
140
|
|
|
151
|
-
return result
|
|
152
141
|
|
|
142
|
+
# ---------------------------------------------------------------------------
|
|
143
|
+
# Arrow -> pandas
|
|
144
|
+
# ---------------------------------------------------------------------------
|
|
153
145
|
|
|
154
|
-
# ---------------------------------------------------------------------------
|
|
155
|
-
# Arrow -> pandas
|
|
156
|
-
# ---------------------------------------------------------------------------
|
|
157
146
|
|
|
147
|
+
@pandas_converter(pa.Array, PandasSeries)
|
|
148
|
+
@pandas_converter(pa.ChunkedArray, PandasSeries)
|
|
149
|
+
def arrow_array_to_pandas_series(
|
|
150
|
+
array: pa.Array,
|
|
151
|
+
cast_options: Optional[CastOptions] = None,
|
|
152
|
+
) -> PandasSeries:
|
|
153
|
+
"""
|
|
154
|
+
Convert a pyarrow.Array (or ChunkedArray) to a pandas Series,
|
|
155
|
+
optionally applying Arrow casting via ArrowCastOptions before conversion.
|
|
156
|
+
"""
|
|
157
|
+
opts = CastOptions.check_arg(cast_options)
|
|
158
158
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
def arrow_array_to_pandas_series(
|
|
162
|
-
array: pa.Array,
|
|
163
|
-
cast_options: Optional[CastOptions] = None,
|
|
164
|
-
) -> "pandas.Series":
|
|
165
|
-
"""
|
|
166
|
-
Convert a pyarrow.Array (or ChunkedArray) to a pandas Series,
|
|
167
|
-
optionally applying Arrow casting via ArrowCastOptions before conversion.
|
|
168
|
-
"""
|
|
169
|
-
opts = CastOptions.check_arg(cast_options)
|
|
170
|
-
|
|
171
|
-
if isinstance(array, pa.ChunkedArray):
|
|
172
|
-
array = array.combine_chunks()
|
|
173
|
-
|
|
174
|
-
casted = cast_arrow_array(array, opts)
|
|
175
|
-
return casted.to_pandas()
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
@pandas_converter(pa.Table, PandasDataFrame)
|
|
179
|
-
def arrow_table_to_pandas_dataframe(
|
|
180
|
-
table: pa.Table,
|
|
181
|
-
cast_options: Optional[CastOptions] = None,
|
|
182
|
-
) -> "pandas.DataFrame":
|
|
183
|
-
"""
|
|
184
|
-
Convert a pyarrow.Table to a pandas DataFrame, optionally applying Arrow
|
|
185
|
-
casting rules first.
|
|
186
|
-
"""
|
|
187
|
-
opts = CastOptions.check_arg(cast_options)
|
|
188
|
-
|
|
189
|
-
if opts.target_arrow_schema is not None:
|
|
190
|
-
table = cast_arrow_tabular(table, opts)
|
|
159
|
+
if isinstance(array, pa.ChunkedArray):
|
|
160
|
+
array = array.combine_chunks()
|
|
191
161
|
|
|
192
|
-
|
|
162
|
+
casted = cast_arrow_array(array, opts)
|
|
163
|
+
return casted.to_pandas()
|
|
193
164
|
|
|
194
165
|
|
|
195
|
-
@pandas_converter(pa.
|
|
196
|
-
def
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
) ->
|
|
200
|
-
|
|
201
|
-
|
|
166
|
+
@pandas_converter(pa.Table, PandasDataFrame)
|
|
167
|
+
def arrow_table_to_pandas_dataframe(
|
|
168
|
+
table: pa.Table,
|
|
169
|
+
cast_options: Optional[CastOptions] = None,
|
|
170
|
+
) -> PandasDataFrame:
|
|
171
|
+
"""
|
|
172
|
+
Convert a pyarrow.Table to a pandas DataFrame, optionally applying Arrow
|
|
173
|
+
casting rules first.
|
|
174
|
+
"""
|
|
175
|
+
opts = CastOptions.check_arg(cast_options)
|
|
202
176
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
"""
|
|
206
|
-
opts = CastOptions.check_arg(cast_options)
|
|
177
|
+
if opts.target_arrow_schema is not None:
|
|
178
|
+
table = cast_arrow_tabular(table, opts)
|
|
207
179
|
|
|
208
|
-
|
|
209
|
-
reader = cast_arrow_record_batch_reader(reader, opts)
|
|
180
|
+
return table.to_pandas()
|
|
210
181
|
|
|
211
|
-
batches = list(reader)
|
|
212
|
-
if not batches:
|
|
213
|
-
empty_table = pa.Table.from_arrays([], names=[])
|
|
214
|
-
return empty_table.to_pandas()
|
|
215
182
|
|
|
216
|
-
|
|
217
|
-
|
|
183
|
+
@pandas_converter(pa.RecordBatchReader, PandasDataFrame)
|
|
184
|
+
def record_batch_reader_to_pandas_dataframe(
|
|
185
|
+
reader: pa.RecordBatchReader,
|
|
186
|
+
cast_options: Optional[CastOptions] = None,
|
|
187
|
+
) -> PandasDataFrame:
|
|
188
|
+
"""
|
|
189
|
+
Convert a pyarrow.RecordBatchReader to a pandas DataFrame.
|
|
218
190
|
|
|
191
|
+
- If cast_options.target_schema is set, we first apply
|
|
192
|
+
`cast_arrow_record_batch_reader` and then collect to a Table and pandas DF.
|
|
193
|
+
"""
|
|
194
|
+
opts = CastOptions.check_arg(cast_options)
|
|
219
195
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
# ---------------------------------------------------------------------------
|
|
196
|
+
if opts.target_arrow_schema is not None:
|
|
197
|
+
reader = cast_arrow_record_batch_reader(reader, opts)
|
|
223
198
|
|
|
199
|
+
batches = list(reader)
|
|
200
|
+
if not batches:
|
|
201
|
+
empty_table = pa.Table.from_arrays([], names=[])
|
|
202
|
+
return empty_table.to_pandas()
|
|
203
|
+
|
|
204
|
+
table = pa.Table.from_batches(batches)
|
|
205
|
+
return arrow_table_to_pandas_dataframe(table, opts)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
# ---------------------------------------------------------------------------
|
|
209
|
+
# pandas -> Arrow
|
|
210
|
+
# ---------------------------------------------------------------------------
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
@pandas_converter(PandasSeries, pa.Array)
|
|
214
|
+
def pandas_series_to_arrow_array(
|
|
215
|
+
series: PandasSeries,
|
|
216
|
+
cast_options: Optional[CastOptions] = None,
|
|
217
|
+
) -> pa.Array:
|
|
218
|
+
"""
|
|
219
|
+
Convert a pandas Series to a pyarrow.Array, optionally applying Arrow
|
|
220
|
+
casting via ArrowCastOptions.
|
|
221
|
+
"""
|
|
222
|
+
opts = CastOptions.check_arg(cast_options)
|
|
223
|
+
|
|
224
|
+
array = pa.array(series, from_pandas=True)
|
|
225
|
+
return cast_arrow_array(array, opts)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
@pandas_converter(PandasDataFrame, pa.Table)
|
|
229
|
+
def pandas_dataframe_to_arrow_table(
|
|
230
|
+
dataframe: PandasDataFrame,
|
|
231
|
+
cast_options: Optional[CastOptions] = None,
|
|
232
|
+
) -> pa.Table:
|
|
233
|
+
"""
|
|
234
|
+
Convert a pandas DataFrame to a pyarrow.Table, optionally applying Arrow
|
|
235
|
+
casting rules via ArrowCastOptions.
|
|
236
|
+
"""
|
|
237
|
+
opts = CastOptions.check_arg(cast_options)
|
|
238
|
+
|
|
239
|
+
table = pa.Table.from_pandas(dataframe, preserve_index=False)
|
|
240
|
+
return cast_arrow_tabular(table, opts)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
@pandas_converter(PandasDataFrame, pa.RecordBatchReader)
|
|
244
|
+
def pandas_dataframe_to_record_batch_reader(
|
|
245
|
+
dataframe: PandasDataFrame,
|
|
246
|
+
cast_options: Optional[CastOptions] = None,
|
|
247
|
+
) -> pa.RecordBatchReader:
|
|
248
|
+
"""
|
|
249
|
+
Convert a pandas DataFrame to a pyarrow.RecordBatchReader, optionally
|
|
250
|
+
applying Arrow casting via ArrowCastOptions.
|
|
251
|
+
"""
|
|
252
|
+
opts = CastOptions.check_arg(cast_options)
|
|
253
|
+
|
|
254
|
+
table = pa.Table.from_pandas(dataframe, preserve_index=False)
|
|
255
|
+
table = cast_arrow_tabular(table, opts)
|
|
224
256
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
series: "pandas.Series",
|
|
228
|
-
cast_options: Optional[CastOptions] = None,
|
|
229
|
-
) -> pa.Array:
|
|
230
|
-
"""
|
|
231
|
-
Convert a pandas Series to a pyarrow.Array, optionally applying Arrow
|
|
232
|
-
casting via ArrowCastOptions.
|
|
233
|
-
"""
|
|
234
|
-
opts = CastOptions.check_arg(cast_options)
|
|
235
|
-
|
|
236
|
-
array = pa.array(series, from_pandas=True)
|
|
237
|
-
return cast_arrow_array(array, opts)
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
@pandas_converter(PandasDataFrame, pa.Table)
|
|
241
|
-
def pandas_dataframe_to_arrow_table(
|
|
242
|
-
dataframe: "pandas.DataFrame",
|
|
243
|
-
cast_options: Optional[CastOptions] = None,
|
|
244
|
-
) -> pa.Table:
|
|
245
|
-
"""
|
|
246
|
-
Convert a pandas DataFrame to a pyarrow.Table, optionally applying Arrow
|
|
247
|
-
casting rules via ArrowCastOptions.
|
|
248
|
-
"""
|
|
249
|
-
opts = CastOptions.check_arg(cast_options)
|
|
250
|
-
|
|
251
|
-
table = pa.Table.from_pandas(dataframe, preserve_index=False)
|
|
252
|
-
return cast_arrow_tabular(table, opts)
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
@pandas_converter(PandasDataFrame, pa.RecordBatchReader)
|
|
256
|
-
def pandas_dataframe_to_record_batch_reader(
|
|
257
|
-
dataframe: "pandas.DataFrame",
|
|
258
|
-
cast_options: Optional[CastOptions] = None,
|
|
259
|
-
) -> pa.RecordBatchReader:
|
|
260
|
-
"""
|
|
261
|
-
Convert a pandas DataFrame to a pyarrow.RecordBatchReader, optionally
|
|
262
|
-
applying Arrow casting via ArrowCastOptions.
|
|
263
|
-
"""
|
|
264
|
-
opts = CastOptions.check_arg(cast_options)
|
|
265
|
-
|
|
266
|
-
table = pa.Table.from_pandas(dataframe, preserve_index=False)
|
|
267
|
-
table = cast_arrow_tabular(table, opts)
|
|
268
|
-
|
|
269
|
-
batches = table.to_batches()
|
|
270
|
-
return pa.RecordBatchReader.from_batches(table.schema, batches)
|
|
257
|
+
batches = table.to_batches()
|
|
258
|
+
return pa.RecordBatchReader.from_batches(table.schema, batches)
|
|
@@ -1,18 +1,21 @@
|
|
|
1
1
|
"""Polars <-> Arrow casting helpers and converters."""
|
|
2
2
|
|
|
3
|
-
from typing import Optional, Tuple, Union, Dict
|
|
3
|
+
from typing import Optional, Tuple, Union, Dict
|
|
4
4
|
|
|
5
5
|
import pyarrow as pa
|
|
6
6
|
|
|
7
7
|
from .arrow_cast import (
|
|
8
8
|
cast_arrow_array,
|
|
9
9
|
cast_arrow_tabular,
|
|
10
|
-
cast_arrow_record_batch_reader,
|
|
10
|
+
cast_arrow_record_batch_reader,
|
|
11
|
+
is_arrow_type_binary_like,
|
|
12
|
+
is_arrow_type_string_like,
|
|
13
|
+
is_arrow_type_list_like,
|
|
11
14
|
)
|
|
12
15
|
from .cast_options import CastOptions
|
|
13
16
|
from .registry import register_converter
|
|
14
17
|
from ..python_defaults import default_arrow_scalar
|
|
15
|
-
from ...libs.polarslib import
|
|
18
|
+
from ...libs.polarslib import *
|
|
16
19
|
|
|
17
20
|
__all__ = [
|
|
18
21
|
"polars_converter",
|
|
@@ -41,14 +44,8 @@ __all__ = [
|
|
|
41
44
|
# Polars type aliases + decorator wrapper (safe when Polars is missing)
|
|
42
45
|
# ---------------------------------------------------------------------------
|
|
43
46
|
|
|
44
|
-
if polars is not None:
|
|
45
|
-
PolarsSeries = polars.Series
|
|
46
|
-
PolarsExpr = polars.Expr
|
|
47
|
-
PolarsDataFrame = polars.DataFrame
|
|
48
|
-
PolarsField = polars.Field
|
|
49
|
-
PolarsSchema = polars.Schema
|
|
50
|
-
PolarsDataType = polars.DataType
|
|
51
47
|
|
|
48
|
+
if polars is not None:
|
|
52
49
|
# Primitive Arrow -> Polars dtype mapping (base, non-nested types).
|
|
53
50
|
# These are Polars *dtype classes* (not instances), so they can be used
|
|
54
51
|
# directly in schemas (e.g. pl.Struct({"a": pl.Int64})).
|
|
@@ -95,18 +92,6 @@ if polars is not None:
|
|
|
95
92
|
else:
|
|
96
93
|
ARROW_TO_POLARS = {}
|
|
97
94
|
|
|
98
|
-
# Dummy types so annotations/decorators don't explode without Polars
|
|
99
|
-
class _PolarsDummy: # pragma: no cover - only used when Polars not installed
|
|
100
|
-
"""Placeholder type for polars symbols when polars is unavailable."""
|
|
101
|
-
pass
|
|
102
|
-
|
|
103
|
-
PolarsSeries = _PolarsDummy
|
|
104
|
-
PolarsExpr = _PolarsDummy
|
|
105
|
-
PolarsDataFrame = _PolarsDummy
|
|
106
|
-
PolarsField = _PolarsDummy
|
|
107
|
-
PolarsSchema = _PolarsDummy
|
|
108
|
-
PolarsDataType = _PolarsDummy
|
|
109
|
-
|
|
110
95
|
def polars_converter(*_args, **_kwargs): # pragma: no cover - no-op decorator
|
|
111
96
|
"""Return a no-op decorator when polars is unavailable.
|
|
112
97
|
|
|
@@ -737,26 +722,6 @@ def arrow_field_to_polars_field(
|
|
|
737
722
|
return built
|
|
738
723
|
|
|
739
724
|
|
|
740
|
-
def _polars_base_type(pl_dtype: Any) -> Any:
|
|
741
|
-
"""
|
|
742
|
-
Normalize a Polars dtype or dtype class to its base_type class,
|
|
743
|
-
so we can key into POLARS_BASE_TO_ARROW.
|
|
744
|
-
"""
|
|
745
|
-
# dtype is an instance
|
|
746
|
-
base_method = getattr(pl_dtype, "base_type", None)
|
|
747
|
-
if callable(base_method):
|
|
748
|
-
return base_method()
|
|
749
|
-
# dtype is a class (e.g. pl.Int64)
|
|
750
|
-
try:
|
|
751
|
-
instance = pl_dtype()
|
|
752
|
-
except Exception:
|
|
753
|
-
return pl_dtype
|
|
754
|
-
base_method = getattr(instance, "base_type", None)
|
|
755
|
-
if callable(base_method):
|
|
756
|
-
return base_method()
|
|
757
|
-
return pl_dtype
|
|
758
|
-
|
|
759
|
-
|
|
760
725
|
@polars_converter(PolarsDataType, pa.DataType)
|
|
761
726
|
def polars_type_to_arrow_type(
|
|
762
727
|
pl_type: PolarsDataType,
|
|
@@ -767,7 +732,10 @@ def polars_type_to_arrow_type(
|
|
|
767
732
|
|
|
768
733
|
Handles primitives via POLARS_BASE_TO_ARROW and common nested/temporal types.
|
|
769
734
|
"""
|
|
770
|
-
|
|
735
|
+
try:
|
|
736
|
+
base = pl_type()
|
|
737
|
+
except Exception:
|
|
738
|
+
base = pl_type
|
|
771
739
|
|
|
772
740
|
# Primitive base mapping
|
|
773
741
|
existing = POLARS_BASE_TO_ARROW.get(base) or POLARS_BASE_TO_ARROW.get(type(pl_type))
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
__all__ = ["DummyModuleClass"]
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DummyModuleClass(ABC):
|
|
8
|
+
"""
|
|
9
|
+
Hard-fail dummy proxy: any interaction raises, except a few safe internals.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
@classmethod
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def module_name(cls) -> str:
|
|
15
|
+
raise NotImplementedError
|
|
16
|
+
|
|
17
|
+
def _raise(self, action: str, name: str | None = None):
|
|
18
|
+
target = type(self).module_name()
|
|
19
|
+
extra = f" '{name}'" if name else ""
|
|
20
|
+
raise ModuleNotFoundError(
|
|
21
|
+
f"{type(self).__name__} is a dummy for missing optional dependency "
|
|
22
|
+
f"module '{target}'. Tried to {action}{extra}."
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# --- attribute access / mutation ---
|
|
26
|
+
def __getattribute__(self, name: str):
|
|
27
|
+
# allow introspection / internals without blowing up
|
|
28
|
+
if name in {"module_name", "_raise", "__class__", "__dict__", "__repr__", "__str__", "__dir__"}:
|
|
29
|
+
return object.__getattribute__(self, name)
|
|
30
|
+
self._raise("access attribute", name)
|
|
31
|
+
|
|
32
|
+
def __getattr__(self, name: str):
|
|
33
|
+
self._raise("access attribute", name)
|
|
34
|
+
|
|
35
|
+
def __setattr__(self, name: str, value):
|
|
36
|
+
self._raise("set attribute", name)
|
|
37
|
+
|
|
38
|
+
def __delattr__(self, name: str):
|
|
39
|
+
self._raise("delete attribute", name)
|
|
40
|
+
|
|
41
|
+
def __dir__(self):
|
|
42
|
+
# show minimal surface
|
|
43
|
+
return ["module_name"]
|
|
44
|
+
|
|
45
|
+
def __repr__(self) -> str:
|
|
46
|
+
return f"<{type(self).__name__} dummy for '{type(self).module_name()}'>"
|
|
47
|
+
|
|
48
|
+
def __str__(self) -> str:
|
|
49
|
+
return self.__repr__()
|
|
50
|
+
|
|
51
|
+
# --- common "other" interactions ---
|
|
52
|
+
def __call__(self, *args, **kwargs):
|
|
53
|
+
self._raise("call module")
|
|
54
|
+
|
|
55
|
+
def __getitem__(self, key):
|
|
56
|
+
self._raise("index", str(key))
|
|
57
|
+
|
|
58
|
+
def __setitem__(self, key, value):
|
|
59
|
+
self._raise("set item", str(key))
|
|
60
|
+
|
|
61
|
+
def __delitem__(self, key):
|
|
62
|
+
self._raise("delete item", str(key))
|
|
63
|
+
|
|
64
|
+
def __iter__(self):
|
|
65
|
+
self._raise("iterate")
|
|
66
|
+
|
|
67
|
+
def __len__(self):
|
|
68
|
+
self._raise("get length")
|
|
69
|
+
|
|
70
|
+
def __contains__(self, item):
|
|
71
|
+
self._raise("check containment")
|
|
72
|
+
|
|
73
|
+
def __bool__(self):
|
|
74
|
+
self._raise("coerce to bool")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# Example:
|
|
78
|
+
# class PyArrowDummy(DummyModuleClass):
|
|
79
|
+
# @classmethod
|
|
80
|
+
# def module_name(cls) -> str:
|
|
81
|
+
# return "pyarrow"
|
yggdrasil/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.60"
|
ygg-0.1.57.dist-info/RECORD
DELETED
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
ygg-0.1.57.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
2
|
-
yggdrasil/__init__.py,sha256=4-ghPak2S6zfMqmnlxW2GCgPb5s79znpKa2hGEGXcE4,24
|
|
3
|
-
yggdrasil/version.py,sha256=mM67BdyYZ17u9xAi4WRzFQM2e6yfmX4MPd36R3L920M,22
|
|
4
|
-
yggdrasil/databricks/__init__.py,sha256=skctY2c8W-hI81upx9F_PWRe5ishL3hrdiTuizgDjdw,152
|
|
5
|
-
yggdrasil/databricks/ai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
yggdrasil/databricks/ai/loki.py,sha256=1AhMOquMTsZZGYw5cGoXn-QQhdBRMXM9ZRPEUAv4Y3k,1216
|
|
7
|
-
yggdrasil/databricks/compute/__init__.py,sha256=NvdzmaJSNYY1uJthv1hHdBuNu3bD_-Z65DWnaJt9yXg,289
|
|
8
|
-
yggdrasil/databricks/compute/cluster.py,sha256=YomLfvB0oxbgl6WDgBRxI1UXsxwlEbR6gq3FUbPHscY,44199
|
|
9
|
-
yggdrasil/databricks/compute/execution_context.py,sha256=jIV6uru2NeX3O5lg-3KEqmXtLxxq45CFgkBQgQIIOHQ,23327
|
|
10
|
-
yggdrasil/databricks/compute/remote.py,sha256=yicEhyQypssRa2ByscO36s3cBkEgORFsRME9aaq91Pc,3045
|
|
11
|
-
yggdrasil/databricks/jobs/__init__.py,sha256=snxGSJb0M5I39v0y3IR-uEeSlZR248cQ_4DJ1sYs-h8,154
|
|
12
|
-
yggdrasil/databricks/jobs/config.py,sha256=9LGeHD04hbfy0xt8_6oobC4moKJh4_DTjZiK4Q2Tqjk,11557
|
|
13
|
-
yggdrasil/databricks/sql/__init__.py,sha256=Vp_1cFaX1l-JGzCknvkbiB8CBFX2fQbBNntIeVn3lEg,231
|
|
14
|
-
yggdrasil/databricks/sql/engine.py,sha256=UzahfAFruv8jFLjfIeoLPDSvuGCkhRHCap9l42mrtps,50850
|
|
15
|
-
yggdrasil/databricks/sql/exceptions.py,sha256=jHHcCc0Fv-rOKz5o_CyrR37QYIdeGZ2Hg4swfdYe7M0,1501
|
|
16
|
-
yggdrasil/databricks/sql/statement_result.py,sha256=GZyVhhrUK5opNo-8HGqsMx0Rp9fa_0zqvn8McSHPQ8U,16310
|
|
17
|
-
yggdrasil/databricks/sql/types.py,sha256=5G-BM9_eOsRKEMzeDTWUsWW5g4Idvs-czVCpOCrMhdA,6412
|
|
18
|
-
yggdrasil/databricks/sql/warehouse.py,sha256=1J0dyQLJb-OS1_1xU1eAVZ4CoL2-FhFeowKSvU3RzFc,9773
|
|
19
|
-
yggdrasil/databricks/workspaces/__init__.py,sha256=dv2zotoFVhNFlTCdRq6gwf5bEzeZkOZszoNZMs0k59g,114
|
|
20
|
-
yggdrasil/databricks/workspaces/filesytem.py,sha256=Z8JXU7_XUEbw9fpTQT1avRQKi-IAP2KemXBMPkUoY4w,9805
|
|
21
|
-
yggdrasil/databricks/workspaces/io.py,sha256=hErGeSKJ9XpSUvlYAAckh_8IKQQmGeDOqbdl2rh9Fbs,33240
|
|
22
|
-
yggdrasil/databricks/workspaces/path.py,sha256=KkvLFHrps3UFr4ogYdESbJHEMfQBcWfWfXjlrv_7rTU,55180
|
|
23
|
-
yggdrasil/databricks/workspaces/path_kind.py,sha256=rhWe1ky7uPD0du0bZSv2S4fK4C5zWd7zAF3UeS2iiPU,283
|
|
24
|
-
yggdrasil/databricks/workspaces/volumes_path.py,sha256=s8CA33cG3jpMVJy5MILLlkEBcFg_qInDCF2jozLj1Fg,2431
|
|
25
|
-
yggdrasil/databricks/workspaces/workspace.py,sha256=Tl1pYzTGNpjsPmHCJ62HoJvdzHGiZb43vQxrI3Sk7js,25233
|
|
26
|
-
yggdrasil/dataclasses/__init__.py,sha256=_RkhfF3KC1eSORby1dzvBXQ0-UGG3u6wyUQWX2jq1Pc,108
|
|
27
|
-
yggdrasil/dataclasses/dataclass.py,sha256=LxrCjwvmBnb8yRI_N-c31RHHxB4XoJPixmKg9iBIuaI,1148
|
|
28
|
-
yggdrasil/libs/__init__.py,sha256=zdC9OU0Xy36CLY9mg2drxN6S7isPR8aTLzJA6xVIeLE,91
|
|
29
|
-
yggdrasil/libs/databrickslib.py,sha256=vD6APmTkRsxeBemm-l84aG_1Vkeo2v_smeBM3Xh32OU,1095
|
|
30
|
-
yggdrasil/libs/pandaslib.py,sha256=GoUjh9dxZAFLe9hs8-6RliLD3jsH_BexYW1w-8BZzb0,618
|
|
31
|
-
yggdrasil/libs/polarslib.py,sha256=hnL8x6ygsyIoiJyIUMaeoji3fRzab4lBiHcMqa29C_Q,618
|
|
32
|
-
yggdrasil/libs/sparklib.py,sha256=FQ3W1iz2EIpQreorOiQuFt15rdhq2QhGEAWp8Zrbl9A,10177
|
|
33
|
-
yggdrasil/libs/extensions/__init__.py,sha256=mcXW5Li3Cbprbs4Ci-b5A0Ju0wmLcfvEiFusTx6xNjU,117
|
|
34
|
-
yggdrasil/libs/extensions/polars_extensions.py,sha256=RTkGi8llhPJjX7x9egix7-yXWo2X24zIAPSKXV37SSA,12397
|
|
35
|
-
yggdrasil/libs/extensions/spark_extensions.py,sha256=E64n-3SFTDgMuXwWitX6vOYP9ln2lpGKb0htoBLEZgc,16745
|
|
36
|
-
yggdrasil/pyutils/__init__.py,sha256=tl-LapAc71TV7RMgf2ftKwrzr8iiLOGHeJgA3RvO93w,293
|
|
37
|
-
yggdrasil/pyutils/callable_serde.py,sha256=1XckmFO-ThP0MedxgXwB71u9jWUuhM1btOzW9gJ8w9g,23117
|
|
38
|
-
yggdrasil/pyutils/equality.py,sha256=Xyf8D1dLUCm3spDEir8Zyj7O4US_fBJwEylJCfJ9slI,3080
|
|
39
|
-
yggdrasil/pyutils/exceptions.py,sha256=ssKNm-rjhavHUOZmGA7_1Gq9tSHDrb2EFI-cnBuWgng,3388
|
|
40
|
-
yggdrasil/pyutils/expiring_dict.py,sha256=pr2u25LGwPVbLfsLptiHGovUtYRRo0AMjaJtCtJl7nQ,8477
|
|
41
|
-
yggdrasil/pyutils/modules.py,sha256=B7IP99YqUMW6-DIESFzBx8-09V1d0a8qrIJUDFhhL2g,11424
|
|
42
|
-
yggdrasil/pyutils/parallel.py,sha256=ubuq2m9dJzWYUyKCga4Y_9bpaeMYUrleYxdp49CHr44,6781
|
|
43
|
-
yggdrasil/pyutils/python_env.py,sha256=Gh5geFK9ABpyWEfyegGUfIJUoPxKwcH0pqLBiMrW9Rw,51103
|
|
44
|
-
yggdrasil/pyutils/retry.py,sha256=gXBtn1DdmIYIUmGKOUr8-SUT7MOu97LykN2YR4uocgc,11917
|
|
45
|
-
yggdrasil/requests/__init__.py,sha256=dMesyzq97_DmI765x0TwaDPEfsxFtgGNgchk8LvEN-o,103
|
|
46
|
-
yggdrasil/requests/msal.py,sha256=s2GCyzbgFdgdlJ1JqMrZ4qYVbmoG46-ZOTcaVQhZ-sQ,9220
|
|
47
|
-
yggdrasil/requests/session.py,sha256=SLnrgHY0Lby7ZxclRFUjHdfM8euN_8bSQEWl7TkJY2U,1461
|
|
48
|
-
yggdrasil/types/__init__.py,sha256=CrLiDeYNM9fO975sE5ufeVKcy7Ca702IsaG2Pk8T3YU,139
|
|
49
|
-
yggdrasil/types/file_format.py,sha256=yqAadZ5z6CrctsQO0ZmEY7eGXLbhBUnvvNOwkPSk0GU,133
|
|
50
|
-
yggdrasil/types/python_arrow.py,sha256=mOhyecAxa5u8JWsyTO26OMOWimHHgwLKWlkNSAyIVas,25636
|
|
51
|
-
yggdrasil/types/python_defaults.py,sha256=GO3hZBZcwRHs9qiXes75y8l5X00kZHTfEC7el_x73uw,10184
|
|
52
|
-
yggdrasil/types/cast/__init__.py,sha256=Oft3pTs2bRM5hT7YqJAuOKTYYk-SACLaMOXUVdafy_I,311
|
|
53
|
-
yggdrasil/types/cast/arrow_cast.py,sha256=_OMYc4t5GlgE4ztlWaCoK8Jnba09rgDbmHVP-QXhOL0,41523
|
|
54
|
-
yggdrasil/types/cast/cast_options.py,sha256=nDaEvCCs7TBamhTWyDrYf3LVaBWzioIP2Q5_LXrChF4,15532
|
|
55
|
-
yggdrasil/types/cast/pandas_cast.py,sha256=I3xu0sZ59ZbK3NDcQ2dslzdeKzhpFV5zR02ZEixd5hI,8713
|
|
56
|
-
yggdrasil/types/cast/polars_cast.py,sha256=RILcbfL4o1XDMp5H-06c0BMrDal5pehOT7ACiItDB6E,28791
|
|
57
|
-
yggdrasil/types/cast/polars_pandas_cast.py,sha256=CS0P7teVv15IdX5g7v40RfkH1VMg6b-HM0V_gOfacm8,5071
|
|
58
|
-
yggdrasil/types/cast/registry.py,sha256=OOqIfbIjPH-a3figvu-zTvEtUDTEWhe2xIl3cCA4PRM,20941
|
|
59
|
-
yggdrasil/types/cast/spark_cast.py,sha256=_KAsl1DqmKMSfWxqhVE7gosjYdgiL1C5bDQv6eP3HtA,24926
|
|
60
|
-
yggdrasil/types/cast/spark_pandas_cast.py,sha256=BuTiWrdCANZCdD_p2MAytqm74eq-rdRXd-LGojBRrfU,5023
|
|
61
|
-
yggdrasil/types/cast/spark_polars_cast.py,sha256=btmZNHXn2NSt3fUuB4xg7coaE0RezIBdZD92H8NK0Jw,9073
|
|
62
|
-
ygg-0.1.57.dist-info/METADATA,sha256=0VEcri5fh3BUYJxxhQ_icTZfhkry6KgEhbKQEEDrKJ4,18528
|
|
63
|
-
ygg-0.1.57.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
64
|
-
ygg-0.1.57.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
|
|
65
|
-
ygg-0.1.57.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
|
|
66
|
-
ygg-0.1.57.dist-info/RECORD,,
|