ygg 0.1.30__tar.gz → 0.1.31__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. {ygg-0.1.30 → ygg-0.1.31}/PKG-INFO +1 -1
  2. {ygg-0.1.30 → ygg-0.1.31}/pyproject.toml +1 -1
  3. {ygg-0.1.30 → ygg-0.1.31}/src/ygg.egg-info/PKG-INFO +1 -1
  4. {ygg-0.1.30 → ygg-0.1.31}/src/ygg.egg-info/SOURCES.txt +4 -1
  5. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/databricks/jobs/config.py +2 -30
  6. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/databricks/sql/statement_result.py +1 -1
  7. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/databricks/sql/types.py +16 -0
  8. ygg-0.1.31/src/yggdrasil/databricks/workspaces/__init__.py +4 -0
  9. ygg-0.1.31/src/yggdrasil/databricks/workspaces/filesytem.py +161 -0
  10. ygg-0.1.31/src/yggdrasil/databricks/workspaces/io.py +745 -0
  11. ygg-0.1.31/src/yggdrasil/databricks/workspaces/path.py +1120 -0
  12. ygg-0.1.31/src/yggdrasil/databricks/workspaces/path_kind.py +10 -0
  13. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/databricks/workspaces/workspace.py +97 -150
  14. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/types/cast/arrow_cast.py +9 -0
  15. ygg-0.1.30/src/yggdrasil/databricks/workspaces/__init__.py +0 -2
  16. ygg-0.1.30/src/yggdrasil/databricks/workspaces/databricks_path.py +0 -784
  17. {ygg-0.1.30 → ygg-0.1.31}/LICENSE +0 -0
  18. {ygg-0.1.30 → ygg-0.1.31}/README.md +0 -0
  19. {ygg-0.1.30 → ygg-0.1.31}/setup.cfg +0 -0
  20. {ygg-0.1.30 → ygg-0.1.31}/src/ygg.egg-info/dependency_links.txt +0 -0
  21. {ygg-0.1.30 → ygg-0.1.31}/src/ygg.egg-info/entry_points.txt +0 -0
  22. {ygg-0.1.30 → ygg-0.1.31}/src/ygg.egg-info/requires.txt +0 -0
  23. {ygg-0.1.30 → ygg-0.1.31}/src/ygg.egg-info/top_level.txt +0 -0
  24. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/__init__.py +0 -0
  25. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/databricks/__init__.py +0 -0
  26. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/databricks/compute/__init__.py +0 -0
  27. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/databricks/compute/cluster.py +0 -0
  28. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/databricks/compute/execution_context.py +0 -0
  29. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/databricks/compute/remote.py +0 -0
  30. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/databricks/jobs/__init__.py +0 -0
  31. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/databricks/sql/__init__.py +0 -0
  32. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/databricks/sql/engine.py +0 -0
  33. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/databricks/sql/exceptions.py +0 -0
  34. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/dataclasses/__init__.py +0 -0
  35. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/dataclasses/dataclass.py +0 -0
  36. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/libs/__init__.py +0 -0
  37. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/libs/databrickslib.py +0 -0
  38. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/libs/extensions/__init__.py +0 -0
  39. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/libs/extensions/polars_extensions.py +0 -0
  40. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/libs/extensions/spark_extensions.py +0 -0
  41. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/libs/pandaslib.py +0 -0
  42. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/libs/polarslib.py +0 -0
  43. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/libs/sparklib.py +0 -0
  44. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/pyutils/__init__.py +0 -0
  45. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/pyutils/callable_serde.py +0 -0
  46. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/pyutils/exceptions.py +0 -0
  47. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/pyutils/modules.py +0 -0
  48. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/pyutils/parallel.py +0 -0
  49. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/pyutils/python_env.py +0 -0
  50. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/pyutils/retry.py +0 -0
  51. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/requests/__init__.py +0 -0
  52. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/requests/msal.py +0 -0
  53. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/requests/session.py +0 -0
  54. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/types/__init__.py +0 -0
  55. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/types/cast/__init__.py +0 -0
  56. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/types/cast/cast_options.py +0 -0
  57. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/types/cast/pandas_cast.py +0 -0
  58. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/types/cast/polars_cast.py +0 -0
  59. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/types/cast/polars_pandas_cast.py +0 -0
  60. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/types/cast/registry.py +0 -0
  61. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/types/cast/spark_cast.py +0 -0
  62. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/types/cast/spark_pandas_cast.py +0 -0
  63. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/types/cast/spark_polars_cast.py +0 -0
  64. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/types/libs.py +0 -0
  65. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/types/python_arrow.py +0 -0
  66. {ygg-0.1.30 → ygg-0.1.31}/src/yggdrasil/types/python_defaults.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.30
3
+ Version: 0.1.31
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ygg"
7
- version = "0.1.30"
7
+ version = "0.1.31"
8
8
  description = "Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks"
9
9
  readme = { file = "README.md", content-type = "text/markdown" }
10
10
  license = { file = "LICENSE" }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.30
3
+ Version: 0.1.31
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -21,7 +21,10 @@ src/yggdrasil/databricks/sql/exceptions.py
21
21
  src/yggdrasil/databricks/sql/statement_result.py
22
22
  src/yggdrasil/databricks/sql/types.py
23
23
  src/yggdrasil/databricks/workspaces/__init__.py
24
- src/yggdrasil/databricks/workspaces/databricks_path.py
24
+ src/yggdrasil/databricks/workspaces/filesytem.py
25
+ src/yggdrasil/databricks/workspaces/io.py
26
+ src/yggdrasil/databricks/workspaces/path.py
27
+ src/yggdrasil/databricks/workspaces/path_kind.py
25
28
  src/yggdrasil/databricks/workspaces/workspace.py
26
29
  src/yggdrasil/dataclasses/__init__.py
27
30
  src/yggdrasil/dataclasses/dataclass.py
@@ -2,11 +2,11 @@ import builtins
2
2
  import dataclasses
3
3
  import datetime as dt
4
4
  import inspect
5
+ import logging
5
6
  from dataclasses import dataclass, fields
6
7
  from enum import Enum
7
8
  from inspect import isclass
8
- from typing import Any, Dict, List, get_type_hints, Optional, get_origin
9
- import logging
9
+ from typing import Any, Dict, List, get_type_hints, get_origin
10
10
 
11
11
  from ...libs.sparklib import SparkSession
12
12
  from ...types.cast.registry import convert
@@ -308,31 +308,3 @@ class NotebookConfig:
308
308
  spark_session.conf.set("spark.sql.session.timeZone", "UTC")
309
309
 
310
310
  return cls.from_environment()
311
-
312
-
313
- class ExampleEnum(Enum):
314
- """Example enum for widget demonstration"""
315
- OPTION1 = "option1"
316
- OPTION2 = "option2"
317
- OPTION3 = "option3"
318
-
319
-
320
- @dataclass
321
- class CompleteNotebookConfig(NotebookConfig):
322
- """Example JobConfig with various field types to demonstrate widget handling"""
323
- # Basic types
324
- text_field: str
325
- integer_field: int = 42
326
- float_field: float = 3.14
327
- boolean_field: bool = True
328
-
329
- # Special types
330
- date_field: dt.date = dt.date(2023, 1, 1)
331
- datetime_field: dt.datetime = dt.datetime(2023, 1, 1, 12, 0, 0)
332
- enum_field: ExampleEnum = ExampleEnum.OPTION1
333
-
334
- # Collection types
335
- list_of_strings: List[str] = None # Will be displayed as multiselect
336
-
337
- # Optional fields
338
- optional_text: Optional[str] = None
@@ -96,7 +96,7 @@ class StatementResult:
96
96
  if self.is_spark_sql:
97
97
  return self._response
98
98
 
99
- if not self.done and self.statement_id and time.time() - self._response_refresh_time > delay:
99
+ if self.statement_id and not self.done and time.time() - self._response_refresh_time > delay:
100
100
  self.response = self.workspace.sdk().statement_execution.get_statement(self.statement_id)
101
101
 
102
102
  return self._response
@@ -102,6 +102,18 @@ def _split_top_level_commas(s: str):
102
102
  return parts
103
103
 
104
104
 
105
+ def _safe_bytes(obj):
106
+ if not isinstance(obj, bytes):
107
+ if not obj:
108
+ return b""
109
+
110
+ if not isinstance(obj, str):
111
+ obj = str(obj)
112
+
113
+ return obj.encode("utf-8")
114
+ return obj
115
+
116
+
105
117
  def parse_sql_type_to_pa(type_str: str) -> pa.DataType:
106
118
  """
107
119
  Adapted parser that:
@@ -170,6 +182,10 @@ def column_info_to_arrow_field(col: Union[SQLColumnInfo, CatalogColumnInfo]):
170
182
  if isinstance(col, CatalogColumnInfo):
171
183
  parsed = json.loads(col.type_json)
172
184
  md = parsed.get("metadata", {}) or {}
185
+ md = {
186
+ _safe_bytes(k): _safe_bytes(v)
187
+ for k, v in md.items()
188
+ }
173
189
  nullable = col.nullable
174
190
  elif isinstance(col, SQLColumnInfo):
175
191
  md = {}
@@ -0,0 +1,4 @@
1
+ from .workspace import *
2
+ from .path import *
3
+ from .io import *
4
+
@@ -0,0 +1,161 @@
1
+ __all__ = [
2
+ "DatabricksFileSystem",
3
+ "DatabricksFileSystemHandler"
4
+ ]
5
+
6
+ from typing import TYPE_CHECKING, Any, Union, List, Optional
7
+
8
+ from pyarrow import PythonFile
9
+ from pyarrow.fs import FileSystem, FileInfo, FileSelector, PyFileSystem, FileSystemHandler
10
+
11
+ if TYPE_CHECKING:
12
+ from ..workspaces.workspace import Workspace
13
+ from .path import DatabricksPath
14
+
15
+
16
+ class DatabricksFileSystemHandler(FileSystemHandler):
17
+
18
+ def __init__(
19
+ self,
20
+ workspace: "Workspace",
21
+ ):
22
+ super().__init__()
23
+ self.workspace = workspace
24
+
25
+ def __enter__(self):
26
+ return self.connect(clone=True)
27
+
28
+ def __exit__(self, exc_type, exc_val, exc_tb):
29
+ self.workspace.__exit__(exc_type, exc_val, exc_tb)
30
+
31
+ def _parse_path(self, obj: Any) -> "DatabricksPath":
32
+ from .path import DatabricksPath
33
+
34
+ return DatabricksPath.parse(obj, workspace=self.workspace)
35
+
36
+ def connect(self, clone: bool = True):
37
+ workspace = self.connect(clone=clone)
38
+
39
+ if clone:
40
+ return DatabricksFileSystemHandler(
41
+ workspace=workspace
42
+ )
43
+
44
+ self.workspace = workspace
45
+ return self
46
+
47
+ def close(self):
48
+ self.workspace.close()
49
+
50
+ def copy_file(self, src, dest, *, chunk_size: int = 4 * 1024 * 1024):
51
+ src = self._parse_path(src)
52
+ dest = self._parse_path(dest)
53
+
54
+ with src.open("rb") as r, dest.open("wb") as w:
55
+ while True:
56
+ chunk = r.read(chunk_size)
57
+ if not chunk:
58
+ break
59
+ w.write(chunk)
60
+
61
+ def create_dir(self, path, *args, recursive: bool = True, **kwargs):
62
+ return self._parse_path(path).mkdir(parents=recursive)
63
+
64
+ def delete_dir(self, path):
65
+ return self._parse_path(path).rmdir(recursive=True)
66
+
67
+ def delete_dir_contents(self, path, *args, accept_root_dir: bool = False, **kwargs):
68
+ return self._parse_path(path).rmdir(recursive=True)
69
+
70
+ def delete_root_dir_contents(self):
71
+ return self.delete_dir_contents("/", accept_root_dir=True)
72
+
73
+ def delete_file(self, path):
74
+ return self._parse_path(path).rmfile()
75
+
76
+ def equals(self, other: FileSystem):
77
+ return self == other
78
+
79
+ def from_uri(self, uri):
80
+ uri = self._parse_path(uri)
81
+
82
+ return self.__class__(
83
+ workspace=uri.workspace
84
+ )
85
+
86
+ def get_file_info(
87
+ self,
88
+ paths_or_selector: Union[FileSelector, str, "DatabricksPath", List[Union[str, "DatabricksPath"]]]
89
+ ) -> Union[FileInfo, List[FileInfo]]:
90
+ from .path import DatabricksPath
91
+
92
+ if isinstance(paths_or_selector, (str, DatabricksPath)):
93
+ result = self._parse_path(paths_or_selector).file_info
94
+
95
+ return result
96
+
97
+ if isinstance(paths_or_selector, FileSelector):
98
+ return self.get_file_info_selector(paths_or_selector)
99
+
100
+ return [
101
+ self.get_file_info(obj)
102
+ for obj in paths_or_selector
103
+ ]
104
+
105
+ def get_file_info_selector(
106
+ self,
107
+ selector: FileSelector
108
+ ):
109
+ base_dir = self._parse_path(selector.base_dir)
110
+
111
+ return [
112
+ p.file_info
113
+ for p in base_dir.ls(
114
+ recursive=selector.recursive,
115
+ allow_not_found=selector.allow_not_found
116
+ )
117
+ ]
118
+
119
+ def get_type_name(self):
120
+ return "dbfs"
121
+
122
+ def move(self, src, dest):
123
+ src = self._parse_path(src)
124
+
125
+ src.copy_to(dest)
126
+
127
+ src.remove(recursive=True)
128
+
129
+ def normalize_path(self, path):
130
+ return self._parse_path(path).full_path()
131
+
132
+ def open(
133
+ self,
134
+ path,
135
+ mode: str = "r+",
136
+ encoding: Optional[str] = None,
137
+ ):
138
+ return self._parse_path(path).open(mode=mode, encoding=encoding, clone=False)
139
+
140
+ def open_append_stream(self, path, compression='detect', buffer_size=None, metadata=None):
141
+ return self._parse_path(path).open(mode="ab")
142
+
143
+ def open_input_file(self, path, mode: str = "rb", **kwargs):
144
+ buf = self._parse_path(path).open(mode=mode).connect(clone=True)
145
+
146
+ return PythonFile(
147
+ buf,
148
+ mode=mode
149
+ )
150
+
151
+ def open_input_stream(self, path, compression='detect', buffer_size=None):
152
+ return self._parse_path(path).open(mode="rb")
153
+
154
+ def open_output_stream(self, path, compression='detect', buffer_size=None, metadata=None):
155
+ return self._parse_path(path).open(mode="wb")
156
+
157
+
158
+ class DatabricksFileSystem(PyFileSystem):
159
+
160
+ def __init__(self, handler): # real signature unknown; restored from __doc__
161
+ super().__init__(handler)