ygg 0.1.30__py3-none-any.whl → 0.1.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.30
3
+ Version: 0.1.31
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -1,4 +1,4 @@
1
- ygg-0.1.30.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
1
+ ygg-0.1.31.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
2
2
  yggdrasil/__init__.py,sha256=6OPibApplA5TF4TeixkQO_qewpaAidYX-fSDvvKYcTI,91
3
3
  yggdrasil/databricks/__init__.py,sha256=aGVve5mpoQtxSK2nfzrexjRPoutCIyaOnKZijkG4_QE,92
4
4
  yggdrasil/databricks/compute/__init__.py,sha256=TVDwPmW2SOmHmnhzZhsvrWbrxZ_lEcgqe3l9BeB-oxM,218
@@ -6,15 +6,18 @@ yggdrasil/databricks/compute/cluster.py,sha256=P4e4eBSSSykO8lzrGLLfHEZukObqcnJgC
6
6
  yggdrasil/databricks/compute/execution_context.py,sha256=ecpjx1smGYu85Gv57SUM9T1Yo3n71gyRhAVqbe7X4-0,18677
7
7
  yggdrasil/databricks/compute/remote.py,sha256=f8IJpPmEjXQh8toaS5Kt8rxkeVph7Uxrcv7utKjnT1I,1347
8
8
  yggdrasil/databricks/jobs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- yggdrasil/databricks/jobs/config.py,sha256=8Slfw4Wl7vu0kIlaUUqVqjjOgPwuULoo0rroENCbC20,11494
9
+ yggdrasil/databricks/jobs/config.py,sha256=FI2fiiXcxps-aRRfcqqQCqnnYj_TUHQNVJMBxQBe_OU,10693
10
10
  yggdrasil/databricks/sql/__init__.py,sha256=JZpQ9eCphDf1l4yzIZ7a7OLigxqXkqOgb0Mio7Rj09A,181
11
11
  yggdrasil/databricks/sql/engine.py,sha256=edNXVdAwjfGENegoeE8cq1mxelaqsUJFtf57puZFEpY,30894
12
12
  yggdrasil/databricks/sql/exceptions.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- yggdrasil/databricks/sql/statement_result.py,sha256=o937Xx8XyXVPdnDPrYAkIjNfsyRpA6a5uzjxRpp7-1U,12909
14
- yggdrasil/databricks/sql/types.py,sha256=YgasSyq8sygk1h6ZOTcRwXAZWNKSuk-9g9VqlR8kJl4,5324
15
- yggdrasil/databricks/workspaces/__init__.py,sha256=tNNS3A_Pl9FYkQ8nGERhr4VF-hwKrvh8k1W8vTaR0uo,58
16
- yggdrasil/databricks/workspaces/databricks_path.py,sha256=Z0PqcJnDAwlfh4VXwfOfUhyzyTuGbZsW4mCoCBkZcCo,28284
17
- yggdrasil/databricks/workspaces/workspace.py,sha256=0MQAJSeJBKGHLAZyDoP5j5GLG4gjOUV4Jh2QvESOglk,19644
13
+ yggdrasil/databricks/sql/statement_result.py,sha256=sqg3hLG-0vmMZSqQEf5QEbmb3CPvBwBKov7hPc_cPlo,12909
14
+ yggdrasil/databricks/sql/types.py,sha256=nkoLNCan5tGRFq_rQJUz2mbz5bjr8effHvm4Gkuaf68,5661
15
+ yggdrasil/databricks/workspaces/__init__.py,sha256=9Hk1csqJp2UiaAS5MZ7oz6KHh9JNo7A4Vo1xf61y2DM,68
16
+ yggdrasil/databricks/workspaces/filesytem.py,sha256=e7oWaLdbhnl8evl2AjTUdhnITMem3ry2WmOFO4CQ02g,4761
17
+ yggdrasil/databricks/workspaces/io.py,sha256=tGyjB5MWJK01lgeZC2my8mlyEEfpS9f-2MidTJNtlCI,21363
18
+ yggdrasil/databricks/workspaces/path.py,sha256=MEPck7V0Pc92JGzgkFz5b6p8rH1m_otmewGttsAs_nQ,39091
19
+ yggdrasil/databricks/workspaces/path_kind.py,sha256=S7f9jKLeceQNc6uZu9safASkZqHfKU-06Pq_3ixtfIU,174
20
+ yggdrasil/databricks/workspaces/workspace.py,sha256=vVm4hD3_Bjv3qRGz6Q03LgNCaIithaJJCCAVVYTYWKM,17563
18
21
  yggdrasil/dataclasses/__init__.py,sha256=QVAvZnNl7gFYTLOVTfMkdQZf6o_WL8_UuDV1uTZ7Aeg,67
19
22
  yggdrasil/dataclasses/dataclass.py,sha256=ln-D1-bbiCLBd2khRMGs4dFoxzJEEGzHTKekWCnF2uk,5436
20
23
  yggdrasil/libs/__init__.py,sha256=88VTVKz8pnB8JSit6rlNzFm9O1ORyxqh0TQ7OBkXkQo,104
@@ -40,7 +43,7 @@ yggdrasil/types/libs.py,sha256=7-p0M4C6TnEWpUGf2nY4XshhJxtXOE_-bsYmJWU6jtk,227
40
43
  yggdrasil/types/python_arrow.py,sha256=1Ac1ZnEgmH4nLNBfrfbM6E9EfANGWtdANqlwMENQkTw,21408
41
44
  yggdrasil/types/python_defaults.py,sha256=7t0GMdgn1yJ-xhObeD3-vP091e6tdPl6qrm-wU7bbqw,8390
42
45
  yggdrasil/types/cast/__init__.py,sha256=2qiXTrUk7HRcdN8ZuDDUxSDCkqJfAfuwG0M1XA7rTzk,240
43
- yggdrasil/types/cast/arrow_cast.py,sha256=Eh0HTAEOSL5d_UAI0sX4qfBlTGSMkGMgDNVqd7875ec,38398
46
+ yggdrasil/types/cast/arrow_cast.py,sha256=1brucYDBLJ9MUjQQ4ura2slIW2lUApzTc_7efq043fg,38664
44
47
  yggdrasil/types/cast/cast_options.py,sha256=R758Gfv3latG6hq_xrB4hbNPHPL51D-euwIvKEY8YYw,12149
45
48
  yggdrasil/types/cast/pandas_cast.py,sha256=-DlaZxGmisIklsZElUZI3g6nrj4R-xNDTI7q01IMB6g,7889
46
49
  yggdrasil/types/cast/polars_cast.py,sha256=Z48vvAVyRYZVYabiad22e3tn44tZ1iaSJgOOoJ1MorU,27523
@@ -49,8 +52,8 @@ yggdrasil/types/cast/registry.py,sha256=-88mq-U1pDSGbEC9PRY0zJCzloyBodXgeSRBPb6h
49
52
  yggdrasil/types/cast/spark_cast.py,sha256=IHthM78dugabGXxNNW9sSHn-olDwzXcFdIFcPo9IiXU,23021
50
53
  yggdrasil/types/cast/spark_pandas_cast.py,sha256=8PgJItF_XbyBcNuBnXkMQU3PBy3sAPEXZT9SXL2WbU4,4200
51
54
  yggdrasil/types/cast/spark_polars_cast.py,sha256=ba1UOvY1ouGCro1Np9slXmJ4TEyWnUtwVEAwxGvPLlk,8336
52
- ygg-0.1.30.dist-info/METADATA,sha256=BBuosBndVIX3PE-BsyjsuXVSjkHYG76cxUr-xXwD24o,19204
53
- ygg-0.1.30.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
54
- ygg-0.1.30.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
55
- ygg-0.1.30.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
56
- ygg-0.1.30.dist-info/RECORD,,
55
+ ygg-0.1.31.dist-info/METADATA,sha256=6xmpGqSGuLKSCgOJkXoMJz3gHI4494h9fCT8Mx5daEQ,19204
56
+ ygg-0.1.31.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
57
+ ygg-0.1.31.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
58
+ ygg-0.1.31.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
59
+ ygg-0.1.31.dist-info/RECORD,,
@@ -2,11 +2,11 @@ import builtins
2
2
  import dataclasses
3
3
  import datetime as dt
4
4
  import inspect
5
+ import logging
5
6
  from dataclasses import dataclass, fields
6
7
  from enum import Enum
7
8
  from inspect import isclass
8
- from typing import Any, Dict, List, get_type_hints, Optional, get_origin
9
- import logging
9
+ from typing import Any, Dict, List, get_type_hints, get_origin
10
10
 
11
11
  from ...libs.sparklib import SparkSession
12
12
  from ...types.cast.registry import convert
@@ -308,31 +308,3 @@ class NotebookConfig:
308
308
  spark_session.conf.set("spark.sql.session.timeZone", "UTC")
309
309
 
310
310
  return cls.from_environment()
311
-
312
-
313
- class ExampleEnum(Enum):
314
- """Example enum for widget demonstration"""
315
- OPTION1 = "option1"
316
- OPTION2 = "option2"
317
- OPTION3 = "option3"
318
-
319
-
320
- @dataclass
321
- class CompleteNotebookConfig(NotebookConfig):
322
- """Example JobConfig with various field types to demonstrate widget handling"""
323
- # Basic types
324
- text_field: str
325
- integer_field: int = 42
326
- float_field: float = 3.14
327
- boolean_field: bool = True
328
-
329
- # Special types
330
- date_field: dt.date = dt.date(2023, 1, 1)
331
- datetime_field: dt.datetime = dt.datetime(2023, 1, 1, 12, 0, 0)
332
- enum_field: ExampleEnum = ExampleEnum.OPTION1
333
-
334
- # Collection types
335
- list_of_strings: List[str] = None # Will be displayed as multiselect
336
-
337
- # Optional fields
338
- optional_text: Optional[str] = None
@@ -96,7 +96,7 @@ class StatementResult:
96
96
  if self.is_spark_sql:
97
97
  return self._response
98
98
 
99
- if not self.done and self.statement_id and time.time() - self._response_refresh_time > delay:
99
+ if self.statement_id and not self.done and time.time() - self._response_refresh_time > delay:
100
100
  self.response = self.workspace.sdk().statement_execution.get_statement(self.statement_id)
101
101
 
102
102
  return self._response
@@ -102,6 +102,18 @@ def _split_top_level_commas(s: str):
102
102
  return parts
103
103
 
104
104
 
105
+ def _safe_bytes(obj):
106
+ if not isinstance(obj, bytes):
107
+ if not obj:
108
+ return b""
109
+
110
+ if not isinstance(obj, str):
111
+ obj = str(obj)
112
+
113
+ return obj.encode("utf-8")
114
+ return obj
115
+
116
+
105
117
  def parse_sql_type_to_pa(type_str: str) -> pa.DataType:
106
118
  """
107
119
  Adapted parser that:
@@ -170,6 +182,10 @@ def column_info_to_arrow_field(col: Union[SQLColumnInfo, CatalogColumnInfo]):
170
182
  if isinstance(col, CatalogColumnInfo):
171
183
  parsed = json.loads(col.type_json)
172
184
  md = parsed.get("metadata", {}) or {}
185
+ md = {
186
+ _safe_bytes(k): _safe_bytes(v)
187
+ for k, v in md.items()
188
+ }
173
189
  nullable = col.nullable
174
190
  elif isinstance(col, SQLColumnInfo):
175
191
  md = {}
@@ -1,2 +1,4 @@
1
1
  from .workspace import *
2
- from .databricks_path import *
2
+ from .path import *
3
+ from .io import *
4
+
@@ -0,0 +1,161 @@
1
+ __all__ = [
2
+ "DatabricksFileSystem",
3
+ "DatabricksFileSystemHandler"
4
+ ]
5
+
6
+ from typing import TYPE_CHECKING, Any, Union, List, Optional
7
+
8
+ from pyarrow import PythonFile
9
+ from pyarrow.fs import FileSystem, FileInfo, FileSelector, PyFileSystem, FileSystemHandler
10
+
11
+ if TYPE_CHECKING:
12
+ from ..workspaces.workspace import Workspace
13
+ from .path import DatabricksPath
14
+
15
+
16
+ class DatabricksFileSystemHandler(FileSystemHandler):
17
+
18
+ def __init__(
19
+ self,
20
+ workspace: "Workspace",
21
+ ):
22
+ super().__init__()
23
+ self.workspace = workspace
24
+
25
+ def __enter__(self):
26
+ return self.connect(clone=True)
27
+
28
+ def __exit__(self, exc_type, exc_val, exc_tb):
29
+ self.workspace.__exit__(exc_type, exc_val, exc_tb)
30
+
31
+ def _parse_path(self, obj: Any) -> "DatabricksPath":
32
+ from .path import DatabricksPath
33
+
34
+ return DatabricksPath.parse(obj, workspace=self.workspace)
35
+
36
+ def connect(self, clone: bool = True):
37
+ workspace = self.connect(clone=clone)
38
+
39
+ if clone:
40
+ return DatabricksFileSystemHandler(
41
+ workspace=workspace
42
+ )
43
+
44
+ self.workspace = workspace
45
+ return self
46
+
47
+ def close(self):
48
+ self.workspace.close()
49
+
50
+ def copy_file(self, src, dest, *, chunk_size: int = 4 * 1024 * 1024):
51
+ src = self._parse_path(src)
52
+ dest = self._parse_path(dest)
53
+
54
+ with src.open("rb") as r, dest.open("wb") as w:
55
+ while True:
56
+ chunk = r.read(chunk_size)
57
+ if not chunk:
58
+ break
59
+ w.write(chunk)
60
+
61
+ def create_dir(self, path, *args, recursive: bool = True, **kwargs):
62
+ return self._parse_path(path).mkdir(parents=recursive)
63
+
64
+ def delete_dir(self, path):
65
+ return self._parse_path(path).rmdir(recursive=True)
66
+
67
+ def delete_dir_contents(self, path, *args, accept_root_dir: bool = False, **kwargs):
68
+ return self._parse_path(path).rmdir(recursive=True)
69
+
70
+ def delete_root_dir_contents(self):
71
+ return self.delete_dir_contents("/", accept_root_dir=True)
72
+
73
+ def delete_file(self, path):
74
+ return self._parse_path(path).rmfile()
75
+
76
+ def equals(self, other: FileSystem):
77
+ return self == other
78
+
79
+ def from_uri(self, uri):
80
+ uri = self._parse_path(uri)
81
+
82
+ return self.__class__(
83
+ workspace=uri.workspace
84
+ )
85
+
86
+ def get_file_info(
87
+ self,
88
+ paths_or_selector: Union[FileSelector, str, "DatabricksPath", List[Union[str, "DatabricksPath"]]]
89
+ ) -> Union[FileInfo, List[FileInfo]]:
90
+ from .path import DatabricksPath
91
+
92
+ if isinstance(paths_or_selector, (str, DatabricksPath)):
93
+ result = self._parse_path(paths_or_selector).file_info
94
+
95
+ return result
96
+
97
+ if isinstance(paths_or_selector, FileSelector):
98
+ return self.get_file_info_selector(paths_or_selector)
99
+
100
+ return [
101
+ self.get_file_info(obj)
102
+ for obj in paths_or_selector
103
+ ]
104
+
105
+ def get_file_info_selector(
106
+ self,
107
+ selector: FileSelector
108
+ ):
109
+ base_dir = self._parse_path(selector.base_dir)
110
+
111
+ return [
112
+ p.file_info
113
+ for p in base_dir.ls(
114
+ recursive=selector.recursive,
115
+ allow_not_found=selector.allow_not_found
116
+ )
117
+ ]
118
+
119
+ def get_type_name(self):
120
+ return "dbfs"
121
+
122
+ def move(self, src, dest):
123
+ src = self._parse_path(src)
124
+
125
+ src.copy_to(dest)
126
+
127
+ src.remove(recursive=True)
128
+
129
+ def normalize_path(self, path):
130
+ return self._parse_path(path).full_path()
131
+
132
+ def open(
133
+ self,
134
+ path,
135
+ mode: str = "r+",
136
+ encoding: Optional[str] = None,
137
+ ):
138
+ return self._parse_path(path).open(mode=mode, encoding=encoding, clone=False)
139
+
140
+ def open_append_stream(self, path, compression='detect', buffer_size=None, metadata=None):
141
+ return self._parse_path(path).open(mode="ab")
142
+
143
+ def open_input_file(self, path, mode: str = "rb", **kwargs):
144
+ buf = self._parse_path(path).open(mode=mode).connect(clone=True)
145
+
146
+ return PythonFile(
147
+ buf,
148
+ mode=mode
149
+ )
150
+
151
+ def open_input_stream(self, path, compression='detect', buffer_size=None):
152
+ return self._parse_path(path).open(mode="rb")
153
+
154
+ def open_output_stream(self, path, compression='detect', buffer_size=None, metadata=None):
155
+ return self._parse_path(path).open(mode="wb")
156
+
157
+
158
+ class DatabricksFileSystem(PyFileSystem):
159
+
160
+ def __init__(self, handler): # real signature unknown; restored from __doc__
161
+ super().__init__(handler)