ygg 0.1.60__py3-none-any.whl → 0.1.65__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ygg-0.1.60.dist-info → ygg-0.1.65.dist-info}/METADATA +2 -2
- {ygg-0.1.60.dist-info → ygg-0.1.65.dist-info}/RECORD +19 -19
- yggdrasil/ai/session.py +1 -3
- yggdrasil/databricks/sql/engine.py +24 -12
- yggdrasil/databricks/sql/warehouse.py +8 -0
- yggdrasil/databricks/workspaces/io.py +108 -43
- yggdrasil/databricks/workspaces/path.py +5 -39
- yggdrasil/databricks/workspaces/workspace.py +6 -10
- yggdrasil/libs/databrickslib.py +2 -3
- yggdrasil/pyutils/modules.py +6 -7
- yggdrasil/pyutils/python_env.py +3 -9
- yggdrasil/requests/msal.py +9 -96
- yggdrasil/types/file_format.py +6 -2
- yggdrasil/types/python_defaults.py +92 -76
- yggdrasil/version.py +1 -1
- {ygg-0.1.60.dist-info → ygg-0.1.65.dist-info}/WHEEL +0 -0
- {ygg-0.1.60.dist-info → ygg-0.1.65.dist-info}/entry_points.txt +0 -0
- {ygg-0.1.60.dist-info → ygg-0.1.65.dist-info}/licenses/LICENSE +0 -0
- {ygg-0.1.60.dist-info → ygg-0.1.65.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ygg
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.65
|
|
4
4
|
Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
|
|
5
5
|
Author: Yggdrasil contributors
|
|
6
6
|
License: Apache License
|
|
@@ -207,7 +207,7 @@ License: Apache License
|
|
|
207
207
|
|
|
208
208
|
Project-URL: Homepage, https://github.com/Platob/Yggdrasil
|
|
209
209
|
Project-URL: Repository, https://github.com/Platob/Yggdrasil
|
|
210
|
-
Project-URL: Documentation, https://github.com/Platob/Yggdrasil
|
|
210
|
+
Project-URL: Documentation, https://github.com/Platob/Yggdrasil
|
|
211
211
|
Keywords: arrow,polars,pandas,spark,databricks,typing,dataclass,serialization
|
|
212
212
|
Classifier: Development Status :: 3 - Alpha
|
|
213
213
|
Classifier: Programming Language :: Python
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
ygg-0.1.
|
|
1
|
+
ygg-0.1.65.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
2
2
|
yggdrasil/__init__.py,sha256=4-ghPak2S6zfMqmnlxW2GCgPb5s79znpKa2hGEGXcE4,24
|
|
3
3
|
yggdrasil/exceptions.py,sha256=NEpbDFn-8ZRsLiEgJicCwrTHNMWAGtdrTJzosfAeVJo,82
|
|
4
|
-
yggdrasil/version.py,sha256=
|
|
4
|
+
yggdrasil/version.py,sha256=P0GENqTQLndQpX5Tkuaob2sv-oNWWzdsMw2PdmqDlFY,22
|
|
5
5
|
yggdrasil/ai/__init__.py,sha256=YEOVsyuvEOvPaZT8XN9xNysS_WOpHTbKgXgnA8up7x0,52
|
|
6
|
-
yggdrasil/ai/session.py,sha256=
|
|
6
|
+
yggdrasil/ai/session.py,sha256=10ATAnw8FOCpfIg9sNR4meki_MRckUzKZ9Uft4IXwLA,2515
|
|
7
7
|
yggdrasil/ai/sql_session.py,sha256=n92tQjHUBIey6c3EJProiEEwfAtQm07Dtmei4WXzeG0,10812
|
|
8
8
|
yggdrasil/databricks/__init__.py,sha256=0GRBP930ManOvyo-Y5E7bz7F2msnvU677OH6rxzPwd8,87
|
|
9
9
|
yggdrasil/databricks/exceptions.py,sha256=-ZULt0wD5_Rxww11nk4Z46DvS5j18RdKR5ISmbQfUQA,142
|
|
@@ -16,22 +16,22 @@ yggdrasil/databricks/compute/remote.py,sha256=sF99i7GXZcC0GiNgO9VO0I26rFbrtnDhK9
|
|
|
16
16
|
yggdrasil/databricks/jobs/__init__.py,sha256=snxGSJb0M5I39v0y3IR-uEeSlZR248cQ_4DJ1sYs-h8,154
|
|
17
17
|
yggdrasil/databricks/jobs/config.py,sha256=9LGeHD04hbfy0xt8_6oobC4moKJh4_DTjZiK4Q2Tqjk,11557
|
|
18
18
|
yggdrasil/databricks/sql/__init__.py,sha256=PetgRp1jEj5K3TgN09FwNUVjVN8YYuGq0cDIOTqsbns,144
|
|
19
|
-
yggdrasil/databricks/sql/engine.py,sha256=
|
|
19
|
+
yggdrasil/databricks/sql/engine.py,sha256=nFWeegs91CtjCLzxgZsJwOlAXNVI1v_lfecuFVfKFFY,49979
|
|
20
20
|
yggdrasil/databricks/sql/exceptions.py,sha256=srMR3Y9LQm45rkyxfyCgpgcoGtRRvGKWBEoUHf4kxsg,1762
|
|
21
21
|
yggdrasil/databricks/sql/statement_result.py,sha256=01DzFX1bGDIGHj0OW2ngfVVJ1w1KHlZEfAI934E35CU,15549
|
|
22
22
|
yggdrasil/databricks/sql/types.py,sha256=5G-BM9_eOsRKEMzeDTWUsWW5g4Idvs-czVCpOCrMhdA,6412
|
|
23
|
-
yggdrasil/databricks/sql/warehouse.py,sha256=
|
|
23
|
+
yggdrasil/databricks/sql/warehouse.py,sha256=bCMWAci_E7pxIH1-9qSgwzpLztLsyiBFjZgME9dOXC8,18971
|
|
24
24
|
yggdrasil/databricks/workspaces/__init__.py,sha256=dv2zotoFVhNFlTCdRq6gwf5bEzeZkOZszoNZMs0k59g,114
|
|
25
25
|
yggdrasil/databricks/workspaces/filesytem.py,sha256=Z8JXU7_XUEbw9fpTQT1avRQKi-IAP2KemXBMPkUoY4w,9805
|
|
26
|
-
yggdrasil/databricks/workspaces/io.py,sha256=
|
|
27
|
-
yggdrasil/databricks/workspaces/path.py,sha256=
|
|
26
|
+
yggdrasil/databricks/workspaces/io.py,sha256=RdgN5lmEYNF5phPRkRMCVHbUl-t3ZUGkKbzgYSTKpII,37420
|
|
27
|
+
yggdrasil/databricks/workspaces/path.py,sha256=k3UB0LhF4hQI-Iza50D5dVjhqNsAdP4KabKiWK7bTWM,55775
|
|
28
28
|
yggdrasil/databricks/workspaces/path_kind.py,sha256=rhWe1ky7uPD0du0bZSv2S4fK4C5zWd7zAF3UeS2iiPU,283
|
|
29
29
|
yggdrasil/databricks/workspaces/volumes_path.py,sha256=s8CA33cG3jpMVJy5MILLlkEBcFg_qInDCF2jozLj1Fg,2431
|
|
30
|
-
yggdrasil/databricks/workspaces/workspace.py,sha256=
|
|
30
|
+
yggdrasil/databricks/workspaces/workspace.py,sha256=f8Ihv3fqo6YYno1yvkXnMpZhQQWwMkklhg-C9MSVyrE,30103
|
|
31
31
|
yggdrasil/dataclasses/__init__.py,sha256=_RkhfF3KC1eSORby1dzvBXQ0-UGG3u6wyUQWX2jq1Pc,108
|
|
32
32
|
yggdrasil/dataclasses/dataclass.py,sha256=LxrCjwvmBnb8yRI_N-c31RHHxB4XoJPixmKg9iBIuaI,1148
|
|
33
33
|
yggdrasil/libs/__init__.py,sha256=zdC9OU0Xy36CLY9mg2drxN6S7isPR8aTLzJA6xVIeLE,91
|
|
34
|
-
yggdrasil/libs/databrickslib.py,sha256=
|
|
34
|
+
yggdrasil/libs/databrickslib.py,sha256=Y99ARtrVKVBTH0qZ0njYr1Oa_757wtsVY4ywH07IdQ4,1109
|
|
35
35
|
yggdrasil/libs/pandaslib.py,sha256=_U4sdFvLAFD16_65RG-RFmcx4c3fvVnALESFaAlT71M,887
|
|
36
36
|
yggdrasil/libs/polarslib.py,sha256=WnnERtMTl__ZPidcZkoV7mb8-c680zcAnJgzAoD3ZE8,1437
|
|
37
37
|
yggdrasil/libs/sparklib.py,sha256=FQ3W1iz2EIpQreorOiQuFt15rdhq2QhGEAWp8Zrbl9A,10177
|
|
@@ -44,19 +44,19 @@ yggdrasil/pyutils/equality.py,sha256=Xyf8D1dLUCm3spDEir8Zyj7O4US_fBJwEylJCfJ9slI
|
|
|
44
44
|
yggdrasil/pyutils/exceptions.py,sha256=1c0xxFvGML5gkDPGzD_Tgw1ff9bGMVygH8ASgeoII2E,3889
|
|
45
45
|
yggdrasil/pyutils/expiring_dict.py,sha256=pr2u25LGwPVbLfsLptiHGovUtYRRo0AMjaJtCtJl7nQ,8477
|
|
46
46
|
yggdrasil/pyutils/mimetypes.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
|
-
yggdrasil/pyutils/modules.py,sha256=
|
|
47
|
+
yggdrasil/pyutils/modules.py,sha256=a0YWunsuA-D-Ho41LrwkN6o7e88NMk7oOzrSzlG0kPQ,11488
|
|
48
48
|
yggdrasil/pyutils/parallel.py,sha256=ubuq2m9dJzWYUyKCga4Y_9bpaeMYUrleYxdp49CHr44,6781
|
|
49
|
-
yggdrasil/pyutils/python_env.py,sha256=
|
|
49
|
+
yggdrasil/pyutils/python_env.py,sha256=d9s6i_fXz6j4f5BnigKxR0dEUJ-4BmqjBD2u2ybX29U,51025
|
|
50
50
|
yggdrasil/pyutils/retry.py,sha256=gXBtn1DdmIYIUmGKOUr8-SUT7MOu97LykN2YR4uocgc,11917
|
|
51
51
|
yggdrasil/pyutils/waiting_config.py,sha256=WiMOiKyGR5iKr83YK4dljn7OCaDpxXMUx8cz-bUNGMg,6255
|
|
52
52
|
yggdrasil/requests/__init__.py,sha256=dMesyzq97_DmI765x0TwaDPEfsxFtgGNgchk8LvEN-o,103
|
|
53
|
-
yggdrasil/requests/msal.py,sha256=
|
|
53
|
+
yggdrasil/requests/msal.py,sha256=XSuKsxEIApfygiWOBBOok_trQk3eeNb5P0f3RAUrtss,6666
|
|
54
54
|
yggdrasil/requests/session.py,sha256=SLnrgHY0Lby7ZxclRFUjHdfM8euN_8bSQEWl7TkJY2U,1461
|
|
55
55
|
yggdrasil/types/__init__.py,sha256=CrLiDeYNM9fO975sE5ufeVKcy7Ca702IsaG2Pk8T3YU,139
|
|
56
56
|
yggdrasil/types/dummy_class.py,sha256=XXM3_ljL4XfY5LeF-WTj-myqHaKAUmWZ23cPDrXAnBM,2327
|
|
57
|
-
yggdrasil/types/file_format.py,sha256=
|
|
57
|
+
yggdrasil/types/file_format.py,sha256=P-3JTa9FzhHj-ndWMGgsF0zxlR_V2q3a_p2R2CwSoRs,273
|
|
58
58
|
yggdrasil/types/python_arrow.py,sha256=mOhyecAxa5u8JWsyTO26OMOWimHHgwLKWlkNSAyIVas,25636
|
|
59
|
-
yggdrasil/types/python_defaults.py,sha256=
|
|
59
|
+
yggdrasil/types/python_defaults.py,sha256=kT7vuNDxzP_5tsy0aOkzVh1sZN7rKR7mky9nrYiFkl0,11063
|
|
60
60
|
yggdrasil/types/cast/__init__.py,sha256=Oft3pTs2bRM5hT7YqJAuOKTYYk-SACLaMOXUVdafy_I,311
|
|
61
61
|
yggdrasil/types/cast/arrow_cast.py,sha256=IZstOcHjLKPy62TFGgjMSW3ttPGt3hMi6RmDw-92T0E,41623
|
|
62
62
|
yggdrasil/types/cast/cast_options.py,sha256=nDaEvCCs7TBamhTWyDrYf3LVaBWzioIP2Q5_LXrChF4,15532
|
|
@@ -67,8 +67,8 @@ yggdrasil/types/cast/registry.py,sha256=OOqIfbIjPH-a3figvu-zTvEtUDTEWhe2xIl3cCA4
|
|
|
67
67
|
yggdrasil/types/cast/spark_cast.py,sha256=_KAsl1DqmKMSfWxqhVE7gosjYdgiL1C5bDQv6eP3HtA,24926
|
|
68
68
|
yggdrasil/types/cast/spark_pandas_cast.py,sha256=BuTiWrdCANZCdD_p2MAytqm74eq-rdRXd-LGojBRrfU,5023
|
|
69
69
|
yggdrasil/types/cast/spark_polars_cast.py,sha256=btmZNHXn2NSt3fUuB4xg7coaE0RezIBdZD92H8NK0Jw,9073
|
|
70
|
-
ygg-0.1.
|
|
71
|
-
ygg-0.1.
|
|
72
|
-
ygg-0.1.
|
|
73
|
-
ygg-0.1.
|
|
74
|
-
ygg-0.1.
|
|
70
|
+
ygg-0.1.65.dist-info/METADATA,sha256=QQp-Hf_yN9HVD8Cjs6xgHzIaJ1g0GTJ7RODqgthqkh0,18506
|
|
71
|
+
ygg-0.1.65.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
72
|
+
ygg-0.1.65.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
|
|
73
|
+
ygg-0.1.65.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
|
|
74
|
+
ygg-0.1.65.dist-info/RECORD,,
|
yggdrasil/ai/session.py
CHANGED
|
@@ -5,7 +5,7 @@ from abc import ABC, abstractmethod
|
|
|
5
5
|
from dataclasses import dataclass, field
|
|
6
6
|
from typing import Dict, List, Optional
|
|
7
7
|
|
|
8
|
-
from
|
|
8
|
+
from ..types.dummy_class import DummyModuleClass
|
|
9
9
|
|
|
10
10
|
try:
|
|
11
11
|
from openai import OpenAI
|
|
@@ -19,8 +19,6 @@ __all__ = ["AISession"]
|
|
|
19
19
|
class AISession(ABC):
|
|
20
20
|
api_key: str
|
|
21
21
|
base_url: str
|
|
22
|
-
|
|
23
|
-
# Gemini default (via OpenAI-compatible gateway)
|
|
24
22
|
model: str = "gemini-2.5-flash"
|
|
25
23
|
|
|
26
24
|
client: OpenAI = field(init=False)
|
|
@@ -17,7 +17,7 @@ import random
|
|
|
17
17
|
import string
|
|
18
18
|
import time
|
|
19
19
|
from threading import Thread
|
|
20
|
-
from typing import Optional, Union, Any, Dict, List, Literal
|
|
20
|
+
from typing import Optional, Union, Any, Dict, List, Literal, TYPE_CHECKING
|
|
21
21
|
|
|
22
22
|
import pyarrow as pa
|
|
23
23
|
import pyarrow.dataset as pds
|
|
@@ -26,11 +26,10 @@ from .statement_result import StatementResult
|
|
|
26
26
|
from .types import column_info_to_arrow_field
|
|
27
27
|
from .warehouse import SQLWarehouse
|
|
28
28
|
from ..workspaces import WorkspaceService, DatabricksPath
|
|
29
|
-
from ...ai.sql_session import SQLAISession, SQLFlavor
|
|
30
29
|
from ...libs.databrickslib import databricks_sdk, DatabricksDummyClass
|
|
31
30
|
from ...libs.sparklib import SparkSession, SparkDataFrame, pyspark
|
|
32
31
|
from ...pyutils.waiting_config import WaitingConfigArg
|
|
33
|
-
from ...types import is_arrow_type_string_like, is_arrow_type_binary_like
|
|
32
|
+
from ...types import is_arrow_type_string_like, is_arrow_type_binary_like, cast_arrow_tabular
|
|
34
33
|
from ...types.cast.cast_options import CastOptions
|
|
35
34
|
from ...types.cast.registry import convert
|
|
36
35
|
from ...types.cast.spark_cast import cast_spark_dataframe
|
|
@@ -63,6 +62,10 @@ if pyspark is not None:
|
|
|
63
62
|
import pyspark.sql.functions as F
|
|
64
63
|
|
|
65
64
|
|
|
65
|
+
if TYPE_CHECKING:
|
|
66
|
+
from ...ai.sql_session import SQLAISession, SQLFlavor
|
|
67
|
+
|
|
68
|
+
|
|
66
69
|
__all__ = [
|
|
67
70
|
"SQLEngine",
|
|
68
71
|
"StatementResult"
|
|
@@ -101,7 +104,7 @@ class SQLEngine(WorkspaceService):
|
|
|
101
104
|
schema_name: Optional[str] = None
|
|
102
105
|
|
|
103
106
|
_warehouse: Optional[SQLWarehouse] = dataclasses.field(default=None, repr=False, hash=False, compare=False)
|
|
104
|
-
_ai_session: Optional[SQLAISession] = dataclasses.field(default=None, repr=False, hash=False, compare=False)
|
|
107
|
+
_ai_session: Optional["SQLAISession"] = dataclasses.field(default=None, repr=False, hash=False, compare=False)
|
|
105
108
|
|
|
106
109
|
def table_full_name(
|
|
107
110
|
self,
|
|
@@ -198,8 +201,13 @@ class SQLEngine(WorkspaceService):
|
|
|
198
201
|
def ai_session(
|
|
199
202
|
self,
|
|
200
203
|
model: str = "databricks-gemini-2-5-pro",
|
|
201
|
-
flavor: SQLFlavor =
|
|
204
|
+
flavor: Optional["SQLFlavor"] = None
|
|
202
205
|
):
|
|
206
|
+
from ...ai.sql_session import SQLAISession, SQLFlavor
|
|
207
|
+
|
|
208
|
+
if flavor is None:
|
|
209
|
+
flavor = SQLFlavor.DATABRICKS
|
|
210
|
+
|
|
203
211
|
return SQLAISession(
|
|
204
212
|
model=model,
|
|
205
213
|
api_key=self.workspace.current_token(),
|
|
@@ -224,7 +232,7 @@ class SQLEngine(WorkspaceService):
|
|
|
224
232
|
catalog_name: Optional[str] = None,
|
|
225
233
|
schema_name: Optional[str] = None,
|
|
226
234
|
wait: Optional[WaitingConfigArg] = True
|
|
227
|
-
) ->
|
|
235
|
+
) -> StatementResult:
|
|
228
236
|
"""Execute a SQL statement via Spark or Databricks SQL Statement Execution API.
|
|
229
237
|
|
|
230
238
|
Engine resolution:
|
|
@@ -504,10 +512,13 @@ class SQLEngine(WorkspaceService):
|
|
|
504
512
|
logger.exception("Failed to drop table %s after auto creation error", location)
|
|
505
513
|
raise
|
|
506
514
|
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
515
|
+
cast_options = CastOptions.check_arg(options=cast_options, target_field=existing_schema)
|
|
516
|
+
|
|
517
|
+
if isinstance(data, (pa.Table, pa.RecordBatch)):
|
|
518
|
+
data_tbl = cast_arrow_tabular(data, options=cast_options)
|
|
519
|
+
else:
|
|
520
|
+
data_tbl = convert(data, pa.Table, options=cast_options)
|
|
521
|
+
|
|
511
522
|
num_rows = data_tbl.num_rows
|
|
512
523
|
|
|
513
524
|
logger.debug(
|
|
@@ -524,7 +535,8 @@ class SQLEngine(WorkspaceService):
|
|
|
524
535
|
catalog_name=catalog_name,
|
|
525
536
|
schema_name=schema_name,
|
|
526
537
|
volume_name="tmp",
|
|
527
|
-
extension="parquet"
|
|
538
|
+
extension="parquet",
|
|
539
|
+
max_lifetime=3600,
|
|
528
540
|
) if temp_volume_path is None else DatabricksPath.parse(obj=temp_volume_path, workspace=connected.workspace)
|
|
529
541
|
|
|
530
542
|
logger.debug("Staging Parquet to temp volume: %s", temp_volume_path)
|
|
@@ -575,7 +587,7 @@ FROM parquet.`{temp_volume_path}`"""
|
|
|
575
587
|
finally:
|
|
576
588
|
try:
|
|
577
589
|
Thread(
|
|
578
|
-
target=temp_volume_path.
|
|
590
|
+
target=temp_volume_path.remove,
|
|
579
591
|
kwargs={
|
|
580
592
|
"recursive": True
|
|
581
593
|
}
|
|
@@ -256,6 +256,7 @@ class SQLWarehouse(WorkspaceService):
|
|
|
256
256
|
elif self.warehouse_id:
|
|
257
257
|
return self
|
|
258
258
|
|
|
259
|
+
starter_warehouse, starter_name = None, "Serverless Starter Warehouse"
|
|
259
260
|
warehouse_name = warehouse_name or self.warehouse_name or self._make_default_name(enable_serverless_compute=True)
|
|
260
261
|
|
|
261
262
|
if warehouse_name:
|
|
@@ -284,8 +285,15 @@ class SQLWarehouse(WorkspaceService):
|
|
|
284
285
|
warehouse_name=warehouse_name,
|
|
285
286
|
warehouse_id=warehouse.warehouse_id
|
|
286
287
|
)
|
|
288
|
+
|
|
287
289
|
return warehouse
|
|
288
290
|
|
|
291
|
+
elif warehouse.warehouse_name == starter_name:
|
|
292
|
+
starter_warehouse = warehouse
|
|
293
|
+
|
|
294
|
+
if starter_warehouse is not None:
|
|
295
|
+
return starter_warehouse
|
|
296
|
+
|
|
289
297
|
if raise_error:
|
|
290
298
|
v = warehouse_name or warehouse_id
|
|
291
299
|
|
|
@@ -3,28 +3,25 @@
|
|
|
3
3
|
import base64
|
|
4
4
|
import io
|
|
5
5
|
import logging
|
|
6
|
+
import os
|
|
6
7
|
import time
|
|
7
8
|
from abc import ABC, abstractmethod
|
|
9
|
+
from tempfile import SpooledTemporaryFile
|
|
8
10
|
from threading import Thread
|
|
9
|
-
from typing import TYPE_CHECKING, Optional, IO, AnyStr, Union, Any
|
|
11
|
+
from typing import TYPE_CHECKING, Optional, IO, AnyStr, Union, Any, BinaryIO
|
|
10
12
|
|
|
11
13
|
import dill
|
|
12
14
|
import pyarrow as pa
|
|
13
15
|
import pyarrow.csv as pcsv
|
|
14
16
|
import pyarrow.parquet as pq
|
|
15
|
-
from pyarrow.dataset import (
|
|
16
|
-
FileFormat,
|
|
17
|
-
ParquetFileFormat,
|
|
18
|
-
CsvFileFormat,
|
|
19
|
-
)
|
|
20
17
|
|
|
21
18
|
from .path_kind import DatabricksPathKind
|
|
22
19
|
from ...libs.databrickslib import databricks
|
|
23
20
|
from ...libs.pandaslib import PandasDataFrame
|
|
24
21
|
from ...libs.polarslib import polars, PolarsDataFrame
|
|
25
|
-
from ...pyutils import retry
|
|
22
|
+
from ...pyutils.retry import retry
|
|
26
23
|
from ...types.cast.registry import convert
|
|
27
|
-
from ...types.file_format import ExcelFileFormat
|
|
24
|
+
from ...types.file_format import FileFormat, ParquetFileFormat, CsvFileFormat, ExcelFileFormat
|
|
28
25
|
|
|
29
26
|
if databricks is not None:
|
|
30
27
|
from databricks.sdk.service.workspace import ImportFormat, ExportFormat
|
|
@@ -45,7 +42,64 @@ __all__ = [
|
|
|
45
42
|
|
|
46
43
|
|
|
47
44
|
LOGGER = logging.getLogger(__name__)
|
|
45
|
+
_SPOOL_MAX = 64 * 1024 * 1024 # 64MB in RAM then spill to disk
|
|
46
|
+
_COPY_CHUNK = 8 * 1024 * 1024 # 8MB chunks
|
|
47
|
+
|
|
48
|
+
def _prepare_binaryio_and_size(
|
|
49
|
+
data: Union[bytes, bytearray, memoryview, BinaryIO]
|
|
50
|
+
) -> tuple[int, BinaryIO, bool]:
|
|
51
|
+
"""
|
|
52
|
+
Returns (size, bio, should_close).
|
|
53
|
+
|
|
54
|
+
- bytes-like -> wrap in BytesIO (closeable by us).
|
|
55
|
+
- seekable file -> compute size via fstat or seek/tell.
|
|
56
|
+
- non-seekable stream -> spool into SpooledTemporaryFile, count bytes.
|
|
57
|
+
"""
|
|
58
|
+
# bytes-like
|
|
59
|
+
if isinstance(data, (bytes, bytearray, memoryview)):
|
|
60
|
+
b = bytes(data)
|
|
61
|
+
return len(b), io.BytesIO(b), True
|
|
62
|
+
|
|
63
|
+
f: BinaryIO = data
|
|
64
|
+
|
|
65
|
+
# 1) try OS-level size for real files
|
|
66
|
+
try:
|
|
67
|
+
fileno = f.fileno() # type: ignore[attr-defined]
|
|
68
|
+
except Exception:
|
|
69
|
+
fileno = None
|
|
70
|
+
|
|
71
|
+
if fileno is not None:
|
|
72
|
+
try:
|
|
73
|
+
st = os.fstat(fileno)
|
|
74
|
+
# rewind if possible
|
|
75
|
+
try:
|
|
76
|
+
f.seek(0)
|
|
77
|
+
except Exception:
|
|
78
|
+
pass
|
|
79
|
+
return int(st.st_size), f, False
|
|
80
|
+
except Exception:
|
|
81
|
+
pass
|
|
82
|
+
|
|
83
|
+
# 2) try seek/tell (seekable streams)
|
|
84
|
+
try:
|
|
85
|
+
f.seek(0, io.SEEK_END)
|
|
86
|
+
end = f.tell()
|
|
87
|
+
f.seek(0)
|
|
88
|
+
return int(end), f, False
|
|
89
|
+
except Exception:
|
|
90
|
+
pass
|
|
48
91
|
|
|
92
|
+
# 3) non-seekable stream: spool + count
|
|
93
|
+
spooled = SpooledTemporaryFile(max_size=_SPOOL_MAX, mode="w+b")
|
|
94
|
+
size = 0
|
|
95
|
+
while True:
|
|
96
|
+
chunk = f.read(_COPY_CHUNK)
|
|
97
|
+
if not chunk:
|
|
98
|
+
break
|
|
99
|
+
spooled.write(chunk)
|
|
100
|
+
size += len(chunk)
|
|
101
|
+
spooled.seek(0)
|
|
102
|
+
return size, spooled, True
|
|
49
103
|
|
|
50
104
|
class DatabricksIO(ABC, IO):
|
|
51
105
|
"""File-like interface for Databricks workspace, volume, or DBFS paths."""
|
|
@@ -102,7 +156,10 @@ class DatabricksIO(ABC, IO):
|
|
|
102
156
|
return self.path.__hash__()
|
|
103
157
|
|
|
104
158
|
def __str__(self):
|
|
105
|
-
return
|
|
159
|
+
return "%s(path=%s)" % (
|
|
160
|
+
self.__class__.__name__,
|
|
161
|
+
self.path.__repr__()
|
|
162
|
+
)
|
|
106
163
|
|
|
107
164
|
def __repr__(self):
|
|
108
165
|
return "%s(path=%s)" % (
|
|
@@ -1081,9 +1138,9 @@ class DatabricksVolumeIO(DatabricksIO):
|
|
|
1081
1138
|
|
|
1082
1139
|
try:
|
|
1083
1140
|
resp = client.download(full_path)
|
|
1084
|
-
except
|
|
1141
|
+
except (NotFound, ResourceDoesNotExist, BadRequest, InternalError) as e:
|
|
1085
1142
|
# Databricks SDK exceptions vary a bit by version; keep it pragmatic.
|
|
1086
|
-
if allow_not_found
|
|
1143
|
+
if allow_not_found:
|
|
1087
1144
|
return b""
|
|
1088
1145
|
raise
|
|
1089
1146
|
|
|
@@ -1096,53 +1153,61 @@ class DatabricksVolumeIO(DatabricksIO):
|
|
|
1096
1153
|
end = start + length
|
|
1097
1154
|
return data[start:end]
|
|
1098
1155
|
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
"""
|
|
1156
|
+
def write_all_bytes(
|
|
1157
|
+
self,
|
|
1158
|
+
data: Union[bytes, bytearray, memoryview, BinaryIO],
|
|
1159
|
+
*,
|
|
1160
|
+
overwrite: bool = True,
|
|
1161
|
+
part_size: Optional[int] = None,
|
|
1162
|
+
use_parallel: bool = True,
|
|
1163
|
+
parallelism: Optional[int] = None,
|
|
1164
|
+
):
|
|
1165
|
+
"""Write bytes/stream to a volume file safely (BinaryIO upload)."""
|
|
1109
1166
|
sdk = self.workspace.sdk()
|
|
1110
1167
|
client = sdk.files
|
|
1111
1168
|
full_path = self.path.files_full_path()
|
|
1112
1169
|
|
|
1113
|
-
LOGGER.debug(
|
|
1114
|
-
"Writing all bytes in %s",
|
|
1115
|
-
self
|
|
1116
|
-
)
|
|
1170
|
+
LOGGER.debug("Writing all bytes in %s", self)
|
|
1117
1171
|
|
|
1118
|
-
|
|
1119
|
-
client.upload(
|
|
1120
|
-
full_path,
|
|
1121
|
-
io.BytesIO(data),
|
|
1122
|
-
overwrite=True
|
|
1123
|
-
)
|
|
1124
|
-
except (NotFound, ResourceDoesNotExist, BadRequest):
|
|
1125
|
-
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
1172
|
+
size, bio, should_close = _prepare_binaryio_and_size(data)
|
|
1126
1173
|
|
|
1127
|
-
|
|
1174
|
+
def _upload():
|
|
1175
|
+
return client.upload(
|
|
1128
1176
|
full_path,
|
|
1129
|
-
|
|
1130
|
-
overwrite=
|
|
1177
|
+
bio,
|
|
1178
|
+
overwrite=overwrite,
|
|
1179
|
+
part_size=part_size,
|
|
1180
|
+
use_parallel=use_parallel,
|
|
1181
|
+
parallelism=parallelism,
|
|
1131
1182
|
)
|
|
1132
1183
|
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1184
|
+
try:
|
|
1185
|
+
_ = _upload()
|
|
1186
|
+
except (NotFound, ResourceDoesNotExist, BadRequest, InternalError):
|
|
1187
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
1188
|
+
# Important: rewind if possible before retry
|
|
1189
|
+
try:
|
|
1190
|
+
bio.seek(0)
|
|
1191
|
+
except Exception:
|
|
1192
|
+
pass
|
|
1193
|
+
_ = _upload()
|
|
1194
|
+
finally:
|
|
1195
|
+
if should_close:
|
|
1196
|
+
try:
|
|
1197
|
+
bio.close()
|
|
1198
|
+
except Exception:
|
|
1199
|
+
pass
|
|
1137
1200
|
|
|
1138
1201
|
self.path.reset_metadata(
|
|
1139
1202
|
is_file=True,
|
|
1140
1203
|
is_dir=False,
|
|
1141
|
-
size=
|
|
1142
|
-
mtime=time.time()
|
|
1204
|
+
size=size,
|
|
1205
|
+
mtime=time.time(),
|
|
1143
1206
|
)
|
|
1144
1207
|
|
|
1145
|
-
|
|
1208
|
+
LOGGER.info("Written %s bytes in %s", size or "all", self.path)
|
|
1209
|
+
|
|
1210
|
+
return self # or return result if your API prefers that
|
|
1146
1211
|
|
|
1147
1212
|
|
|
1148
1213
|
class DatabricksDBFSIO(DatabricksIO):
|
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
"""Databricks path abstraction spanning DBFS, workspace, and volumes."""
|
|
2
2
|
|
|
3
3
|
# src/yggdrasil/databricks/workspaces/databricks_path.py
|
|
4
|
-
from __future__ import annotations
|
|
5
|
-
|
|
6
4
|
import dataclasses
|
|
7
5
|
import datetime as dt
|
|
8
6
|
import io
|
|
@@ -15,9 +13,7 @@ from typing import Optional, Tuple, Union, TYPE_CHECKING, List, Any, IO
|
|
|
15
13
|
|
|
16
14
|
import dill
|
|
17
15
|
import pyarrow as pa
|
|
18
|
-
import pyarrow.dataset as ds
|
|
19
16
|
from pyarrow import ArrowInvalid
|
|
20
|
-
from pyarrow.dataset import FileFormat, ParquetFileFormat, CsvFileFormat, JsonFileFormat
|
|
21
17
|
from pyarrow.fs import FileInfo, FileType, FileSystem
|
|
22
18
|
|
|
23
19
|
from .io import DatabricksIO
|
|
@@ -25,12 +21,9 @@ from .path_kind import DatabricksPathKind
|
|
|
25
21
|
from .volumes_path import get_volume_status, get_volume_metadata
|
|
26
22
|
from ...libs.databrickslib import databricks
|
|
27
23
|
from ...libs.pandaslib import PandasDataFrame
|
|
28
|
-
from ...libs.polarslib import polars
|
|
29
|
-
from ...types.cast.
|
|
30
|
-
from ...types.
|
|
31
|
-
from ...types.cast.polars_cast import polars_converter, cast_polars_dataframe
|
|
32
|
-
from ...types.cast.registry import convert, register_converter
|
|
33
|
-
from ...types.file_format import ExcelFileFormat
|
|
24
|
+
from ...libs.polarslib import polars
|
|
25
|
+
from ...types.cast.registry import convert
|
|
26
|
+
from ...types.file_format import FileFormat, ExcelFileFormat, ParquetFileFormat, JsonFileFormat, CsvFileFormat
|
|
34
27
|
|
|
35
28
|
if databricks is not None:
|
|
36
29
|
from databricks.sdk.errors import InternalError
|
|
@@ -1305,6 +1298,8 @@ class DatabricksPath:
|
|
|
1305
1298
|
Returns:
|
|
1306
1299
|
A PyArrow Dataset instance.
|
|
1307
1300
|
"""
|
|
1301
|
+
import pyarrow.dataset as ds
|
|
1302
|
+
|
|
1308
1303
|
filesystem = self.filesystem(workspace=workspace) if filesystem is None else filesystem
|
|
1309
1304
|
|
|
1310
1305
|
return ds.dataset(
|
|
@@ -1684,32 +1679,3 @@ class DatabricksPath:
|
|
|
1684
1679
|
raise ValueError(
|
|
1685
1680
|
"Invalid engine %s, must be in duckdb, polars" % engine
|
|
1686
1681
|
)
|
|
1687
|
-
|
|
1688
|
-
if databricks is not None:
|
|
1689
|
-
@register_converter(DatabricksPath, ds.Dataset)
|
|
1690
|
-
def databricks_path_to_arrow_table(
|
|
1691
|
-
data: DatabricksPath,
|
|
1692
|
-
options: Optional[CastOptions] = None,
|
|
1693
|
-
) -> ds.Dataset:
|
|
1694
|
-
return data.arrow_dataset()
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
@pandas_converter(DatabricksPath, PandasDataFrame)
|
|
1698
|
-
def databricks_path_to_pandas(
|
|
1699
|
-
data: DatabricksPath,
|
|
1700
|
-
options: Optional[CastOptions] = None,
|
|
1701
|
-
) -> PolarsDataFrame:
|
|
1702
|
-
return cast_pandas_dataframe(
|
|
1703
|
-
data.read_pandas(),
|
|
1704
|
-
options
|
|
1705
|
-
)
|
|
1706
|
-
|
|
1707
|
-
@polars_converter(DatabricksPath, PolarsDataFrame)
|
|
1708
|
-
def databricks_path_to_polars(
|
|
1709
|
-
data: DatabricksPath,
|
|
1710
|
-
options: Optional[CastOptions] = None,
|
|
1711
|
-
) -> PolarsDataFrame:
|
|
1712
|
-
return cast_polars_dataframe(
|
|
1713
|
-
data.read_polars(),
|
|
1714
|
-
options
|
|
1715
|
-
)
|
|
@@ -520,9 +520,9 @@ class Workspace:
|
|
|
520
520
|
Returns:
|
|
521
521
|
A DatabricksPath pointing at the shared cache location.
|
|
522
522
|
"""
|
|
523
|
-
start = int(time.time()
|
|
524
|
-
max_lifetime = max_lifetime or 48
|
|
525
|
-
end = int(start + max_lifetime)
|
|
523
|
+
start = int(time.time())
|
|
524
|
+
max_lifetime = int(max_lifetime or 48 * 3600)
|
|
525
|
+
end = max(0, int(start + max_lifetime))
|
|
526
526
|
|
|
527
527
|
base_path = base_path or self._base_tmp_path(
|
|
528
528
|
catalog_name=catalog_name,
|
|
@@ -575,19 +575,15 @@ class Workspace:
|
|
|
575
575
|
base_path
|
|
576
576
|
)
|
|
577
577
|
|
|
578
|
-
|
|
579
|
-
|
|
578
|
+
for path in base_path.ls(recursive=False, allow_not_found=True):
|
|
579
|
+
if path.name.startswith("tmp"):
|
|
580
580
|
parts = path.name.split("-")
|
|
581
581
|
|
|
582
582
|
if len(parts) > 2 and parts[0] == "tmp" and parts[1].isdigit() and parts[2].isdigit():
|
|
583
|
-
end = int(parts[2])
|
|
583
|
+
end = int(parts[2])
|
|
584
584
|
|
|
585
585
|
if end and time.time() > end:
|
|
586
586
|
path.remove(recursive=True)
|
|
587
|
-
except Exception as e:
|
|
588
|
-
if raise_error:
|
|
589
|
-
raise e
|
|
590
|
-
LOGGER.warning(e)
|
|
591
587
|
|
|
592
588
|
LOGGER.info(
|
|
593
589
|
"Cleaned temp path %s",
|
yggdrasil/libs/databrickslib.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""Optional Databricks SDK dependency helpers."""
|
|
2
|
-
from
|
|
2
|
+
from ..types.dummy_class import DummyModuleClass
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
class DatabricksDummyClass(DummyModuleClass):
|
|
@@ -25,7 +25,7 @@ def require_databricks_sdk():
|
|
|
25
25
|
|
|
26
26
|
try:
|
|
27
27
|
import databricks
|
|
28
|
-
import databricks.sdk
|
|
28
|
+
import databricks.sdk
|
|
29
29
|
|
|
30
30
|
from databricks.sdk import WorkspaceClient
|
|
31
31
|
|
|
@@ -34,7 +34,6 @@ try:
|
|
|
34
34
|
except ImportError:
|
|
35
35
|
databricks = DatabricksDummyClass
|
|
36
36
|
databricks_sdk = DatabricksDummyClass
|
|
37
|
-
|
|
38
37
|
WorkspaceClient = DatabricksDummyClass
|
|
39
38
|
|
|
40
39
|
|
yggdrasil/pyutils/modules.py
CHANGED
|
@@ -42,7 +42,7 @@ MODULE_PROJECT_NAMES_ALIASES = {
|
|
|
42
42
|
"yggdrasil": "ygg",
|
|
43
43
|
"jwt": "PyJWT",
|
|
44
44
|
}
|
|
45
|
-
|
|
45
|
+
DEFAULT_PIP_INDEX_SETTINGS = None
|
|
46
46
|
|
|
47
47
|
def module_name_to_project_name(module_name: str) -> str:
|
|
48
48
|
"""Map module import names to PyPI project names when they differ.
|
|
@@ -264,6 +264,11 @@ class PipIndexSettings:
|
|
|
264
264
|
Returns:
|
|
265
265
|
Default PipIndexSettings instance.
|
|
266
266
|
"""
|
|
267
|
+
global DEFAULT_PIP_INDEX_SETTINGS
|
|
268
|
+
|
|
269
|
+
if DEFAULT_PIP_INDEX_SETTINGS is None:
|
|
270
|
+
DEFAULT_PIP_INDEX_SETTINGS = get_pip_index_settings()
|
|
271
|
+
|
|
267
272
|
return DEFAULT_PIP_INDEX_SETTINGS
|
|
268
273
|
|
|
269
274
|
@property
|
|
@@ -363,9 +368,3 @@ def get_pip_index_settings() -> PipIndexSettings:
|
|
|
363
368
|
extra_index_urls.append(u)
|
|
364
369
|
|
|
365
370
|
return PipIndexSettings(index_url=index_url, extra_index_urls=extra_index_urls, sources=sources)
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
try:
|
|
369
|
-
DEFAULT_PIP_INDEX_SETTINGS = get_pip_index_settings()
|
|
370
|
-
except:
|
|
371
|
-
DEFAULT_PIP_INDEX_SETTINGS = PipIndexSettings()
|
yggdrasil/pyutils/python_env.py
CHANGED
|
@@ -27,7 +27,6 @@ log = logging.getLogger(__name__)
|
|
|
27
27
|
|
|
28
28
|
class PythonEnvError(RuntimeError):
|
|
29
29
|
"""Raised when Python environment operations fail."""
|
|
30
|
-
|
|
31
30
|
pass
|
|
32
31
|
|
|
33
32
|
|
|
@@ -72,6 +71,9 @@ _NON_PIPABLE_RE = re.compile(
|
|
|
72
71
|
re.IGNORECASE,
|
|
73
72
|
)
|
|
74
73
|
|
|
74
|
+
# Snapshot singleton (import-time)
|
|
75
|
+
CURRENT_PYTHON_ENV: "PythonEnv" = None
|
|
76
|
+
|
|
75
77
|
|
|
76
78
|
|
|
77
79
|
def _filter_non_pipable_linux_packages(requirements: Iterable[str]) -> List[str]:
|
|
@@ -1508,11 +1510,3 @@ print("RESULT:" + json.dumps(top_level))""".strip()
|
|
|
1508
1510
|
log.error("python_env CLI error: %s", e)
|
|
1509
1511
|
print(f"ERROR: {e}", file=sys.stderr)
|
|
1510
1512
|
return 2
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
# Snapshot singleton (import-time)
|
|
1514
|
-
CURRENT_PYTHON_ENV: PythonEnv = None
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
if __name__ == "__main__":
|
|
1518
|
-
raise SystemExit(PythonEnv.cli())
|
yggdrasil/requests/msal.py
CHANGED
|
@@ -3,12 +3,8 @@
|
|
|
3
3
|
# auth_session.py
|
|
4
4
|
import os
|
|
5
5
|
import time
|
|
6
|
-
from
|
|
7
|
-
|
|
8
|
-
import urllib3
|
|
9
|
-
|
|
10
|
-
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
11
|
-
from dataclasses import dataclass
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Any, Optional
|
|
12
8
|
|
|
13
9
|
from .session import YGGSession
|
|
14
10
|
|
|
@@ -38,11 +34,11 @@ class MSALAuth:
|
|
|
38
34
|
authority: Optional authority URL override.
|
|
39
35
|
scopes: List of scopes to request.
|
|
40
36
|
"""
|
|
41
|
-
tenant_id: Optional[str] =
|
|
42
|
-
client_id: Optional[str] =
|
|
43
|
-
client_secret: Optional[str] =
|
|
44
|
-
authority: Optional[str] =
|
|
45
|
-
scopes: list[str] | None =
|
|
37
|
+
tenant_id: Optional[str] = field(default_factory=lambda: os.environ.get("AZURE_TENANT_ID"))
|
|
38
|
+
client_id: Optional[str] = field(default_factory=lambda: os.environ.get("AZURE_CLIENT_ID"))
|
|
39
|
+
client_secret: Optional[str] = field(default_factory=lambda: os.environ.get("AZURE_CLIENT_SECRET"))
|
|
40
|
+
authority: Optional[str] = field(default_factory=lambda: os.environ.get("AZURE_AUTHORITY"))
|
|
41
|
+
scopes: list[str] | None = field(default_factory=lambda: os.environ.get("AZURE_SCOPES"))
|
|
46
42
|
|
|
47
43
|
_auth_app: ConfidentialClientApplication | None = None
|
|
48
44
|
_expires_at: float | None = None
|
|
@@ -77,97 +73,15 @@ class MSALAuth:
|
|
|
77
73
|
Returns:
|
|
78
74
|
None.
|
|
79
75
|
"""
|
|
80
|
-
self.tenant_id = self.tenant_id or os.environ.get("AZURE_TENANT_ID")
|
|
81
|
-
self.client_id = self.client_id or os.environ.get("AZURE_CLIENT_ID")
|
|
82
|
-
self.client_secret = self.client_secret or os.environ.get("AZURE_CLIENT_SECRET")
|
|
83
|
-
|
|
84
|
-
self.authority = self.authority or os.environ.get("AZURE_AUTHORITY")
|
|
85
76
|
if not self.authority:
|
|
77
|
+
assert self.tenant_id, "tenant_id is required to build authority URL"
|
|
78
|
+
|
|
86
79
|
self.authority = f"https://login.microsoftonline.com/{self.tenant_id}"
|
|
87
80
|
|
|
88
|
-
self.scopes = self.scopes or os.environ.get("AZURE_SCOPES")
|
|
89
81
|
if self.scopes:
|
|
90
82
|
if isinstance(self.scopes, str):
|
|
91
83
|
self.scopes = self.scopes.split(",")
|
|
92
84
|
|
|
93
|
-
self._validate_config()
|
|
94
|
-
|
|
95
|
-
def _validate_config(self):
|
|
96
|
-
"""Validate that all required configuration is present.
|
|
97
|
-
|
|
98
|
-
Returns:
|
|
99
|
-
None.
|
|
100
|
-
"""
|
|
101
|
-
missing = []
|
|
102
|
-
|
|
103
|
-
if not self.client_id:
|
|
104
|
-
missing.append("azure_client_id (AZURE_CLIENT_ID)")
|
|
105
|
-
if not self.client_secret:
|
|
106
|
-
missing.append("azure_client_secret (AZURE_CLIENT_SECRET)")
|
|
107
|
-
if not self.tenant_id:
|
|
108
|
-
missing.append("azure_client_secret (AZURE_TENANT_ID)")
|
|
109
|
-
if not self.scopes:
|
|
110
|
-
missing.append("scopes (AZURE_SCOPES)")
|
|
111
|
-
|
|
112
|
-
if missing:
|
|
113
|
-
raise ValueError(f"Missing required configuration: {', '.join(missing)}")
|
|
114
|
-
|
|
115
|
-
@classmethod
|
|
116
|
-
def find_in_env(
|
|
117
|
-
cls,
|
|
118
|
-
env: Mapping = None,
|
|
119
|
-
prefix: Optional[str] = None
|
|
120
|
-
) -> "MSALAuth":
|
|
121
|
-
"""Return an MSALAuth built from environment variables if available.
|
|
122
|
-
|
|
123
|
-
Args:
|
|
124
|
-
env: Mapping to read variables from; defaults to os.environ.
|
|
125
|
-
prefix: Optional prefix for variable names.
|
|
126
|
-
|
|
127
|
-
Returns:
|
|
128
|
-
A configured MSALAuth instance or None.
|
|
129
|
-
"""
|
|
130
|
-
if not env:
|
|
131
|
-
env = os.environ
|
|
132
|
-
prefix = prefix or "AZURE_"
|
|
133
|
-
|
|
134
|
-
required = {
|
|
135
|
-
key: env.get(prefix + key.upper())
|
|
136
|
-
for key in (
|
|
137
|
-
"client_id", "client_secret", "tenant_id", "scopes"
|
|
138
|
-
)
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
if all(required.values()):
|
|
142
|
-
scopes = required["scopes"].split(",") if required["scopes"] else None
|
|
143
|
-
return MSALAuth(
|
|
144
|
-
tenant_id=required["tenant_id"],
|
|
145
|
-
client_id=required["client_id"],
|
|
146
|
-
client_secret=required["client_secret"],
|
|
147
|
-
scopes=scopes,
|
|
148
|
-
authority=env.get(prefix + "AUTHORITY"),
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
return None
|
|
152
|
-
|
|
153
|
-
def export_to(self, to: dict = os.environ):
|
|
154
|
-
"""Export the auth configuration to the provided mapping.
|
|
155
|
-
|
|
156
|
-
Args:
|
|
157
|
-
to: Mapping to populate with auth configuration values.
|
|
158
|
-
|
|
159
|
-
Returns:
|
|
160
|
-
None.
|
|
161
|
-
"""
|
|
162
|
-
for key, value in (
|
|
163
|
-
("AZURE_CLIENT_ID", self.client_id),
|
|
164
|
-
("AZURE_CLIENT_SECRET", self.client_secret),
|
|
165
|
-
("AZURE_AUTHORITY", self.authority),
|
|
166
|
-
("AZURE_SCOPES", ",".join(self.scopes)),
|
|
167
|
-
):
|
|
168
|
-
if value:
|
|
169
|
-
to[key] = value
|
|
170
|
-
|
|
171
85
|
@property
|
|
172
86
|
def auth_app(self) -> ConfidentialClientApplication:
|
|
173
87
|
"""Return or initialize the MSAL confidential client.
|
|
@@ -298,7 +212,6 @@ class MSALSession(YGGSession):
|
|
|
298
212
|
super().__init__(*args, **kwargs)
|
|
299
213
|
self.msal_auth = msal_auth
|
|
300
214
|
|
|
301
|
-
|
|
302
215
|
def prepare_request(self, request):
|
|
303
216
|
"""Prepare the request with an Authorization header when needed.
|
|
304
217
|
|
yggdrasil/types/file_format.py
CHANGED
|
@@ -1,8 +1,12 @@
|
|
|
1
|
-
from pyarrow.dataset import FileFormat
|
|
1
|
+
from pyarrow.dataset import FileFormat, ParquetFileFormat, CsvFileFormat, JsonFileFormat
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
__all__ = [
|
|
5
|
-
"
|
|
5
|
+
"FileFormat",
|
|
6
|
+
"ExcelFileFormat",
|
|
7
|
+
"ParquetFileFormat",
|
|
8
|
+
"CsvFileFormat",
|
|
9
|
+
"JsonFileFormat"
|
|
6
10
|
]
|
|
7
11
|
|
|
8
12
|
|
|
@@ -18,84 +18,96 @@ __all__ = [
|
|
|
18
18
|
"default_arrow_array"
|
|
19
19
|
]
|
|
20
20
|
|
|
21
|
+
DEFAULT_MAPS_INITIALIZED = False
|
|
21
22
|
|
|
22
23
|
_NONE_TYPE = type(None)
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
24
|
+
_ARROW_DEFAULTS = {}
|
|
25
|
+
_POLARS_DEFAULTS = {}
|
|
26
|
+
_PRIMITIVE_DEFAULTS = {}
|
|
27
|
+
_SPECIAL_DEFAULTS = {}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def ensure_default_maps_initialized():
|
|
31
|
+
global DEFAULT_MAPS_INITIALIZED
|
|
32
|
+
global _PRIMITIVE_DEFAULTS
|
|
33
|
+
global _SPECIAL_DEFAULTS
|
|
34
|
+
global _ARROW_DEFAULTS
|
|
35
|
+
global _POLARS_DEFAULTS
|
|
36
|
+
|
|
37
|
+
if not DEFAULT_MAPS_INITIALIZED:
|
|
38
|
+
_PRIMITIVE_DEFAULTS = {
|
|
39
|
+
str: "",
|
|
40
|
+
int: 0,
|
|
41
|
+
float: 0.0,
|
|
42
|
+
bool: False,
|
|
43
|
+
bytes: b"",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
_SPECIAL_DEFAULTS = {
|
|
47
|
+
datetime.datetime: lambda: datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc),
|
|
48
|
+
datetime.date: lambda: datetime.date(1970, 1, 1),
|
|
49
|
+
datetime.time: lambda: datetime.time(0, 0, 0, tzinfo=datetime.timezone.utc),
|
|
50
|
+
datetime.timedelta: lambda: datetime.timedelta(0),
|
|
51
|
+
uuid.UUID: lambda: uuid.UUID(int=0),
|
|
52
|
+
decimal.Decimal: lambda: decimal.Decimal(0),
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
_ARROW_DEFAULTS = {
|
|
56
|
+
pa.null(): pa.scalar(None, type=pa.null()),
|
|
57
|
+
|
|
58
|
+
pa.bool_(): pa.scalar(False, type=pa.bool_()),
|
|
59
|
+
|
|
60
|
+
pa.int8(): pa.scalar(0, type=pa.int8()),
|
|
61
|
+
pa.int16(): pa.scalar(0, type=pa.int16()),
|
|
62
|
+
pa.int32(): pa.scalar(0, type=pa.int32()),
|
|
63
|
+
pa.int64(): pa.scalar(0, type=pa.int64()),
|
|
64
|
+
|
|
65
|
+
pa.uint8(): pa.scalar(0, type=pa.uint8()),
|
|
66
|
+
pa.uint16(): pa.scalar(0, type=pa.uint16()),
|
|
67
|
+
pa.uint32(): pa.scalar(0, type=pa.uint32()),
|
|
68
|
+
pa.uint64(): pa.scalar(0, type=pa.uint64()),
|
|
69
|
+
|
|
70
|
+
# pa.float16(): pa.scalar(0.0, type=pa.float16()),
|
|
71
|
+
pa.float32(): pa.scalar(0.0, type=pa.float32()),
|
|
72
|
+
pa.float64(): pa.scalar(0.0, type=pa.float64()),
|
|
73
|
+
|
|
74
|
+
pa.string(): pa.scalar("", type=pa.string()),
|
|
75
|
+
pa.string_view(): pa.scalar("", type=pa.string_view()),
|
|
76
|
+
pa.large_string(): pa.scalar("", type=pa.large_string()),
|
|
77
|
+
|
|
78
|
+
pa.binary(): pa.scalar(b"", type=pa.binary()),
|
|
79
|
+
pa.binary_view(): pa.scalar(b"", type=pa.binary_view()),
|
|
80
|
+
pa.large_binary(): pa.scalar(b"", type=pa.large_binary()),
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
import polars
|
|
85
|
+
|
|
86
|
+
_POLARS_DEFAULTS = {
|
|
87
|
+
polars.Null(): None,
|
|
88
|
+
polars.Boolean(): False,
|
|
89
|
+
|
|
90
|
+
polars.Binary(): b"",
|
|
91
|
+
|
|
92
|
+
polars.Utf8(): "",
|
|
93
|
+
|
|
94
|
+
polars.Int8(): 0,
|
|
95
|
+
polars.Int16(): 0,
|
|
96
|
+
polars.Int32(): 0,
|
|
97
|
+
polars.Int64(): 0,
|
|
98
|
+
|
|
99
|
+
polars.UInt8(): 0,
|
|
100
|
+
polars.UInt16(): 0,
|
|
101
|
+
polars.UInt32(): 0,
|
|
102
|
+
polars.UInt64(): 0,
|
|
103
|
+
|
|
104
|
+
polars.Float32(): 0.0,
|
|
105
|
+
polars.Float64(): 0.0,
|
|
106
|
+
}
|
|
107
|
+
except ImportError:
|
|
108
|
+
pass
|
|
109
|
+
|
|
110
|
+
DEFAULT_MAPS_INITIALIZED = True
|
|
99
111
|
|
|
100
112
|
def _is_optional(hint) -> bool:
|
|
101
113
|
"""Return True when the type hint is Optional.
|
|
@@ -199,6 +211,8 @@ def default_arrow_scalar(
|
|
|
199
211
|
Returns:
|
|
200
212
|
Arrow scalar default.
|
|
201
213
|
"""
|
|
214
|
+
ensure_default_maps_initialized()
|
|
215
|
+
|
|
202
216
|
if nullable:
|
|
203
217
|
return pa.scalar(None, type=dtype)
|
|
204
218
|
|
|
@@ -307,6 +321,8 @@ def default_python_scalar(hint: Any):
|
|
|
307
321
|
if _is_optional(hint):
|
|
308
322
|
return None
|
|
309
323
|
|
|
324
|
+
ensure_default_maps_initialized()
|
|
325
|
+
|
|
310
326
|
if hint in _PRIMITIVE_DEFAULTS:
|
|
311
327
|
return _PRIMITIVE_DEFAULTS[hint]
|
|
312
328
|
|
yggdrasil/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.65"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|