ygg 0.1.30__py3-none-any.whl → 0.1.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/METADATA +1 -1
  2. ygg-0.1.32.dist-info/RECORD +60 -0
  3. yggdrasil/__init__.py +2 -0
  4. yggdrasil/databricks/__init__.py +2 -0
  5. yggdrasil/databricks/compute/__init__.py +2 -0
  6. yggdrasil/databricks/compute/cluster.py +241 -2
  7. yggdrasil/databricks/compute/execution_context.py +100 -11
  8. yggdrasil/databricks/compute/remote.py +16 -0
  9. yggdrasil/databricks/jobs/__init__.py +5 -0
  10. yggdrasil/databricks/jobs/config.py +31 -34
  11. yggdrasil/databricks/sql/__init__.py +2 -0
  12. yggdrasil/databricks/sql/engine.py +217 -36
  13. yggdrasil/databricks/sql/exceptions.py +1 -0
  14. yggdrasil/databricks/sql/statement_result.py +148 -1
  15. yggdrasil/databricks/sql/types.py +49 -1
  16. yggdrasil/databricks/workspaces/__init__.py +4 -1
  17. yggdrasil/databricks/workspaces/filesytem.py +344 -0
  18. yggdrasil/databricks/workspaces/io.py +1123 -0
  19. yggdrasil/databricks/workspaces/path.py +1415 -0
  20. yggdrasil/databricks/workspaces/path_kind.py +13 -0
  21. yggdrasil/databricks/workspaces/workspace.py +298 -154
  22. yggdrasil/dataclasses/__init__.py +2 -0
  23. yggdrasil/dataclasses/dataclass.py +42 -1
  24. yggdrasil/libs/__init__.py +2 -0
  25. yggdrasil/libs/databrickslib.py +9 -0
  26. yggdrasil/libs/extensions/__init__.py +2 -0
  27. yggdrasil/libs/extensions/polars_extensions.py +72 -0
  28. yggdrasil/libs/extensions/spark_extensions.py +116 -0
  29. yggdrasil/libs/pandaslib.py +7 -0
  30. yggdrasil/libs/polarslib.py +7 -0
  31. yggdrasil/libs/sparklib.py +41 -0
  32. yggdrasil/pyutils/__init__.py +4 -0
  33. yggdrasil/pyutils/callable_serde.py +106 -0
  34. yggdrasil/pyutils/exceptions.py +16 -0
  35. yggdrasil/pyutils/modules.py +44 -1
  36. yggdrasil/pyutils/parallel.py +29 -0
  37. yggdrasil/pyutils/python_env.py +301 -0
  38. yggdrasil/pyutils/retry.py +57 -0
  39. yggdrasil/requests/__init__.py +4 -0
  40. yggdrasil/requests/msal.py +124 -3
  41. yggdrasil/requests/session.py +18 -0
  42. yggdrasil/types/__init__.py +2 -0
  43. yggdrasil/types/cast/__init__.py +2 -1
  44. yggdrasil/types/cast/arrow_cast.py +131 -0
  45. yggdrasil/types/cast/cast_options.py +119 -1
  46. yggdrasil/types/cast/pandas_cast.py +29 -0
  47. yggdrasil/types/cast/polars_cast.py +47 -0
  48. yggdrasil/types/cast/polars_pandas_cast.py +29 -0
  49. yggdrasil/types/cast/registry.py +176 -0
  50. yggdrasil/types/cast/spark_cast.py +76 -0
  51. yggdrasil/types/cast/spark_pandas_cast.py +29 -0
  52. yggdrasil/types/cast/spark_polars_cast.py +28 -0
  53. yggdrasil/types/libs.py +2 -0
  54. yggdrasil/types/python_arrow.py +191 -0
  55. yggdrasil/types/python_defaults.py +73 -0
  56. yggdrasil/version.py +1 -0
  57. ygg-0.1.30.dist-info/RECORD +0 -56
  58. yggdrasil/databricks/workspaces/databricks_path.py +0 -784
  59. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/WHEEL +0 -0
  60. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/entry_points.txt +0 -0
  61. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/licenses/LICENSE +0 -0
  62. {ygg-0.1.30.dist-info → ygg-0.1.32.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,5 @@
1
+ """MSAL-backed authentication helpers for requests sessions."""
2
+
1
3
  # auth_session.py
2
4
  import os
3
5
  import time
@@ -27,6 +29,15 @@ __all__ = [
27
29
 
28
30
  @dataclass
29
31
  class MSALAuth:
32
+ """Configuration and token cache for MSAL client credential flows.
33
+
34
+ Args:
35
+ tenant_id: Azure tenant ID.
36
+ client_id: Azure application client ID.
37
+ client_secret: Azure application client secret.
38
+ authority: Optional authority URL override.
39
+ scopes: List of scopes to request.
40
+ """
30
41
  tenant_id: Optional[str] = None
31
42
  client_id: Optional[str] = None
32
43
  client_secret: Optional[str] = None
@@ -38,12 +49,34 @@ class MSALAuth:
38
49
  _access_token: Optional[str] = None
39
50
 
40
51
  def __setitem__(self, key, value):
52
+ """Set an attribute via mapping-style assignment.
53
+
54
+ Args:
55
+ key: Attribute name to set.
56
+ value: Value to assign.
57
+
58
+ Returns:
59
+ None.
60
+ """
41
61
  self.__setattr__(key, value)
42
62
 
43
63
  def __getitem__(self, item):
64
+ """Return attribute values via mapping-style access.
65
+
66
+ Args:
67
+ item: Attribute name to fetch.
68
+
69
+ Returns:
70
+ The attribute value.
71
+ """
44
72
  return getattr(self, item)
45
73
 
46
74
  def __post_init__(self):
75
+ """Populate defaults from environment variables and validate.
76
+
77
+ Returns:
78
+ None.
79
+ """
47
80
  self.tenant_id = self.tenant_id or os.environ.get("AZURE_TENANT_ID")
48
81
  self.client_id = self.client_id or os.environ.get("AZURE_CLIENT_ID")
49
82
  self.client_secret = self.client_secret or os.environ.get("AZURE_CLIENT_SECRET")
@@ -60,7 +93,11 @@ class MSALAuth:
60
93
  self._validate_config()
61
94
 
62
95
  def _validate_config(self):
63
- """Validate that all required configuration is present."""
96
+ """Validate that all required configuration is present.
97
+
98
+ Returns:
99
+ None.
100
+ """
64
101
  missing = []
65
102
 
66
103
  if not self.client_id:
@@ -81,6 +118,15 @@ class MSALAuth:
81
118
  env: Mapping = None,
82
119
  prefix: Optional[str] = None
83
120
  ) -> "MSALAuth":
121
+ """Return an MSALAuth built from environment variables if available.
122
+
123
+ Args:
124
+ env: Mapping to read variables from; defaults to os.environ.
125
+ prefix: Optional prefix for variable names.
126
+
127
+ Returns:
128
+ A configured MSALAuth instance or None.
129
+ """
84
130
  if not env:
85
131
  env = os.environ
86
132
  prefix = prefix or "AZURE_"
@@ -105,6 +151,14 @@ class MSALAuth:
105
151
  return None
106
152
 
107
153
  def export_to(self, to: dict = os.environ):
154
+ """Export the auth configuration to the provided mapping.
155
+
156
+ Args:
157
+ to: Mapping to populate with auth configuration values.
158
+
159
+ Returns:
160
+ None.
161
+ """
108
162
  for key, value in (
109
163
  ("AZURE_CLIENT_ID", self.client_id),
110
164
  ("AZURE_CLIENT_SECRET", self.client_secret),
@@ -116,6 +170,11 @@ class MSALAuth:
116
170
 
117
171
  @property
118
172
  def auth_app(self) -> ConfidentialClientApplication:
173
+ """Return or initialize the MSAL confidential client.
174
+
175
+ Returns:
176
+ MSAL confidential client instance.
177
+ """
119
178
  if not self._auth_app:
120
179
  self._auth_app = ConfidentialClientApplication(
121
180
  client_id=self.client_id,
@@ -127,19 +186,42 @@ class MSALAuth:
127
186
 
128
187
  @property
129
188
  def expires_in(self) -> float:
189
+ """Return the number of seconds since the token expiry timestamp.
190
+
191
+ Returns:
192
+ Seconds elapsed since expiry (negative if not expired).
193
+ """
130
194
  return time.time() - self.expires_at
131
195
 
132
196
  @property
133
197
  def expires_at(self) -> float:
198
+ """Ensure the token is fresh and return the expiry timestamp.
199
+
200
+ Returns:
201
+ Token expiration time as a Unix timestamp.
202
+ """
134
203
  self.refresh()
135
204
 
136
205
  return self._expires_at
137
206
 
138
207
  @property
139
208
  def expired(self) -> bool:
209
+ """Return True when the token is missing or past its expiry time.
210
+
211
+ Returns:
212
+ True if expired or missing; False otherwise.
213
+ """
140
214
  return not self._expires_at or time.time() >= self._expires_at
141
215
 
142
216
  def refresh(self, force: bool | None = None):
217
+ """Acquire or refresh the token if needed.
218
+
219
+ Args:
220
+ force: Force refresh even if not expired.
221
+
222
+ Returns:
223
+ The updated MSALAuth instance.
224
+ """
143
225
  if self.expired or force:
144
226
  app = self.auth_app
145
227
  result = app.acquire_token_for_client(scopes=self.scopes)
@@ -157,16 +239,32 @@ class MSALAuth:
157
239
 
158
240
  @property
159
241
  def access_token(self) -> str:
160
- """Return access token."""
242
+ """Return access token.
243
+
244
+ Returns:
245
+ Access token string.
246
+ """
161
247
  self.refresh()
162
248
  return self._access_token
163
249
 
164
250
  @property
165
251
  def authorization(self) -> str:
166
- """Return authorization token."""
252
+ """Return authorization token.
253
+
254
+ Returns:
255
+ Authorization header value.
256
+ """
167
257
  return f"Bearer {self.access_token}"
168
258
 
169
259
  def requests_session(self, **kwargs):
260
+ """Build a requests session that injects the MSAL authorization header.
261
+
262
+ Args:
263
+ **kwargs: Passed through to MSALSession.
264
+
265
+ Returns:
266
+ Configured MSALSession.
267
+ """
170
268
  return MSALSession(
171
269
  msal_auth=self,
172
270
  **kwargs
@@ -174,6 +272,11 @@ class MSALAuth:
174
272
 
175
273
 
176
274
  class MSALSession(YGGSession):
275
+ """YGGSession subclass that injects MSAL authorization headers.
276
+
277
+ Args:
278
+ YGGSession: Base retry-capable session.
279
+ """
177
280
  msal_auth: MSALAuth | None = None
178
281
 
179
282
  def __init__(
@@ -182,11 +285,29 @@ class MSALSession(YGGSession):
182
285
  *args,
183
286
  **kwargs: dict
184
287
  ):
288
+ """Initialize the session with optional MSAL auth configuration.
289
+
290
+ Args:
291
+ msal_auth: MSALAuth configuration for token injection.
292
+ *args: Positional args for YGGSession.
293
+ **kwargs: Keyword args for YGGSession.
294
+
295
+ Returns:
296
+ None.
297
+ """
185
298
  super().__init__(*args, **kwargs)
186
299
  self.msal_auth = msal_auth
187
300
 
188
301
 
189
302
  def prepare_request(self, request):
303
+ """Prepare the request with an Authorization header when needed.
304
+
305
+ Args:
306
+ request: requests.PreparedRequest to mutate.
307
+
308
+ Returns:
309
+ Prepared request.
310
+ """
190
311
  # called before sending; ensure header exists
191
312
  if self.msal_auth is not None:
192
313
  request.headers["Authorization"] = request.headers.get("Authorization", self.msal_auth.authorization)
@@ -1,3 +1,5 @@
1
+ """HTTP session helpers with retry-enabled defaults."""
2
+
1
3
  from typing import Optional, Dict
2
4
 
3
5
  from requests import Session
@@ -10,6 +12,11 @@ __all__ = [
10
12
 
11
13
 
12
14
  class YGGSession(Session):
15
+ """Requests session with preconfigured retry adapter support.
16
+
17
+ Args:
18
+ Session: Base requests session type.
19
+ """
13
20
  def __init__(
14
21
  self,
15
22
  num_retry: int = 4,
@@ -17,6 +24,17 @@ class YGGSession(Session):
17
24
  *args,
18
25
  **kwargs
19
26
  ):
27
+ """Initialize the session with retries and optional default headers.
28
+
29
+ Args:
30
+ num_retry: Number of retries for connection and read errors.
31
+ headers: Optional default headers to merge into the session.
32
+ *args: Additional positional arguments passed to Session.
33
+ **kwargs: Additional keyword arguments passed to Session.
34
+
35
+ Returns:
36
+ None.
37
+ """
20
38
  super(YGGSession, self).__init__()
21
39
 
22
40
  retry = Retry(
@@ -1,3 +1,5 @@
1
+ """Type utilities for Arrow inference and casting."""
2
+
1
3
  from .python_arrow import *
2
4
  from .python_defaults import *
3
5
  from .cast import *
@@ -1,3 +1,5 @@
1
+ """Casting utilities and converters across Arrow and engine types."""
2
+
1
3
  from .registry import *
2
4
  from .arrow_cast import *
3
5
  from .polars_cast import *
@@ -6,4 +8,3 @@ from .spark_cast import *
6
8
  from .spark_polars_cast import *
7
9
  from .spark_pandas_cast import *
8
10
  from .polars_pandas_cast import *
9
-
@@ -1,11 +1,15 @@
1
+ """Arrow casting helpers for arrays, tables, and schemas."""
2
+
1
3
  import dataclasses
2
4
  import enum
3
5
  import logging
4
6
  from dataclasses import is_dataclass
7
+ from functools import partial
5
8
  from typing import Optional, Union, List, Tuple, Any
6
9
 
7
10
  import pyarrow as pa
8
11
  import pyarrow.compute as pc
12
+ import pyarrow.dataset as pds
9
13
 
10
14
  from .cast_options import CastOptions
11
15
  from .registry import register_converter
@@ -451,6 +455,15 @@ def any_to_arrow_scalar(
451
455
  scalar: Any,
452
456
  options: Optional[CastOptions] = None,
453
457
  ) -> pa.Scalar:
458
+ """Convert a Python value to an Arrow scalar.
459
+
460
+ Args:
461
+ scalar: Input value.
462
+ options: Optional cast options.
463
+
464
+ Returns:
465
+ Arrow scalar.
466
+ """
454
467
  if isinstance(scalar, pa.Scalar):
455
468
  return cast_arrow_scalar(scalar, options)
456
469
 
@@ -491,6 +504,15 @@ def cast_arrow_scalar(
491
504
  scalar: pa.Scalar,
492
505
  options: Optional[CastOptions] = None,
493
506
  ) -> pa.Scalar:
507
+ """Cast an Arrow scalar to the target Arrow field.
508
+
509
+ Args:
510
+ scalar: Arrow scalar to cast.
511
+ options: Optional cast options.
512
+
513
+ Returns:
514
+ Casted Arrow scalar.
515
+ """
494
516
  options = CastOptions.check_arg(options)
495
517
  target_field = options.target_field
496
518
 
@@ -740,6 +762,28 @@ def cast_arrow_tabular(
740
762
  return data.__class__.from_arrays(all_arrays, schema=target_arrow_schema)
741
763
 
742
764
 
765
+ @register_converter(pds.Dataset, pds.Dataset)
766
+ def cast_arrow_dataset(data: pds.Dataset, options: Optional[CastOptions] = None) -> pds.Dataset:
767
+ """Cast a dataset to the target schema in options.
768
+
769
+ Args:
770
+ data: Arrow dataset to cast.
771
+ options: Optional cast options.
772
+
773
+ Returns:
774
+ Casted dataset.
775
+ """
776
+ if options is None:
777
+ return data
778
+
779
+ caster = partial(cast_arrow_tabular, options=options)
780
+
781
+ return pds.dataset(map(caster, data.to_batches(
782
+ batch_size=256 * 1024,
783
+ memory_pool=options.arrow_memory_pool
784
+ )))
785
+
786
+
743
787
  @register_converter(pa.RecordBatchReader, pa.RecordBatchReader)
744
788
  def cast_arrow_record_batch_reader(
745
789
  data: pa.RecordBatchReader,
@@ -756,6 +800,11 @@ def cast_arrow_record_batch_reader(
756
800
  return data
757
801
 
758
802
  def casted_batches():
803
+ """Yield casted batches from a RecordBatchReader.
804
+
805
+ Yields:
806
+ Casted RecordBatch instances.
807
+ """
759
808
  for batch in data:
760
809
  yield cast_arrow_tabular(batch, options)
761
810
 
@@ -769,6 +818,15 @@ def any_to_arrow_array(
769
818
  obj: Any,
770
819
  options: Optional[CastOptions] = None,
771
820
  ) -> pa.Array:
821
+ """Convert array-like input into an Arrow array.
822
+
823
+ Args:
824
+ obj: Array-like input.
825
+ options: Optional cast options.
826
+
827
+ Returns:
828
+ Arrow array.
829
+ """
772
830
  options = CastOptions.check_arg(options)
773
831
  arrow_array = None
774
832
 
@@ -845,6 +903,15 @@ def pylist_to_record_batch(
845
903
  data: list,
846
904
  options: Optional[CastOptions] = None,
847
905
  ) -> pa.RecordBatch:
906
+ """Convert a list of rows into a RecordBatch.
907
+
908
+ Args:
909
+ data: List of row objects.
910
+ options: Optional cast options.
911
+
912
+ Returns:
913
+ Arrow RecordBatch.
914
+ """
848
915
  options = CastOptions.check_arg(options)
849
916
 
850
917
  array: Union[pa.Array, pa.StructArray] = any_to_arrow_array(data, options)
@@ -1095,6 +1162,43 @@ def record_batch_reader_to_record_batch(
1095
1162
  return table_to_record_batch(table, options)
1096
1163
 
1097
1164
 
1165
+ @register_converter(pds.Dataset, pa.Table)
1166
+ def arrow_dataset_to_table(
1167
+ data: pds.Dataset,
1168
+ options: Optional[CastOptions] = None,
1169
+ ) -> pa.Table:
1170
+ """Convert a dataset to a Table and apply casting.
1171
+
1172
+ Args:
1173
+ data: Arrow dataset.
1174
+ options: Optional cast options.
1175
+
1176
+ Returns:
1177
+ Arrow table.
1178
+ """
1179
+ table = data.to_table()
1180
+ return cast_arrow_tabular(table, options)
1181
+
1182
+
1183
+ @register_converter(pa.Table, pds.Dataset)
1184
+ @register_converter(pa.RecordBatch, pds.Dataset)
1185
+ def arrow_tabular_to_dataset(
1186
+ data: Union[pa.Table, pa.RecordBatch],
1187
+ options: Optional[CastOptions] = None,
1188
+ ) -> pa.Field:
1189
+ """Convert Arrow tabular data to a dataset after casting.
1190
+
1191
+ Args:
1192
+ data: Table or RecordBatch.
1193
+ options: Optional cast options.
1194
+
1195
+ Returns:
1196
+ Arrow dataset.
1197
+ """
1198
+ data = cast_arrow_tabular(data, options)
1199
+ return pds.dataset([data])
1200
+
1201
+
1098
1202
  # ---------------------------------------------------------------------------
1099
1203
  # Field / Schema converters
1100
1204
  # ---------------------------------------------------------------------------
@@ -1145,6 +1249,15 @@ def arrow_schema_to_field(
1145
1249
  data: pa.Schema,
1146
1250
  options: Optional[CastOptions] = None,
1147
1251
  ) -> pa.Field:
1252
+ """Wrap an Arrow schema as a struct field.
1253
+
1254
+ Args:
1255
+ data: Arrow schema.
1256
+ options: Optional cast options.
1257
+
1258
+ Returns:
1259
+ Arrow field.
1260
+ """
1148
1261
  dtype = pa.struct(list(data))
1149
1262
  md = dict(data.metadata or {})
1150
1263
  name = md.setdefault(b"name", b"root")
@@ -1157,6 +1270,15 @@ def arrow_field_to_schema(
1157
1270
  data: pa.Field,
1158
1271
  options: Optional[CastOptions] = None,
1159
1272
  ) -> pa.Schema:
1273
+ """Return a schema view of an Arrow field.
1274
+
1275
+ Args:
1276
+ data: Arrow field.
1277
+ options: Optional cast options.
1278
+
1279
+ Returns:
1280
+ Arrow schema.
1281
+ """
1160
1282
  md = dict(data.metadata or {})
1161
1283
  md[b"name"] = data.name.encode()
1162
1284
 
@@ -1172,4 +1294,13 @@ def arrow_tabular_to_field(
1172
1294
  data: Union[pa.Table, pa.RecordBatch, pa.RecordBatchReader],
1173
1295
  options: Optional[CastOptions] = None,
1174
1296
  ) -> pa.Field:
1297
+ """Return a field representing the schema of tabular data.
1298
+
1299
+ Args:
1300
+ data: Arrow table/batch/reader.
1301
+ options: Optional cast options.
1302
+
1303
+ Returns:
1304
+ Arrow field.
1305
+ """
1175
1306
  return arrow_schema_to_field(data.schema, options)