ygg 0.1.60__py3-none-any.whl → 0.1.65__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.60
3
+ Version: 0.1.65
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -207,7 +207,7 @@ License: Apache License
207
207
 
208
208
  Project-URL: Homepage, https://github.com/Platob/Yggdrasil
209
209
  Project-URL: Repository, https://github.com/Platob/Yggdrasil
210
- Project-URL: Documentation, https://github.com/Platob/Yggdrasil/tree/main/python/docs
210
+ Project-URL: Documentation, https://github.com/Platob/Yggdrasil
211
211
  Keywords: arrow,polars,pandas,spark,databricks,typing,dataclass,serialization
212
212
  Classifier: Development Status :: 3 - Alpha
213
213
  Classifier: Programming Language :: Python
@@ -1,9 +1,9 @@
1
- ygg-0.1.60.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
1
+ ygg-0.1.65.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
2
2
  yggdrasil/__init__.py,sha256=4-ghPak2S6zfMqmnlxW2GCgPb5s79znpKa2hGEGXcE4,24
3
3
  yggdrasil/exceptions.py,sha256=NEpbDFn-8ZRsLiEgJicCwrTHNMWAGtdrTJzosfAeVJo,82
4
- yggdrasil/version.py,sha256=RzpPAn4AEKR5U8Ey0m3Oy_SvSsWT9yeRqhNeTwbK0ks,22
4
+ yggdrasil/version.py,sha256=P0GENqTQLndQpX5Tkuaob2sv-oNWWzdsMw2PdmqDlFY,22
5
5
  yggdrasil/ai/__init__.py,sha256=YEOVsyuvEOvPaZT8XN9xNysS_WOpHTbKgXgnA8up7x0,52
6
- yggdrasil/ai/session.py,sha256=X4btr4OTPLzk1rZx0pZLMJ6Gni1DfEMghAmx9qI1qdE,2579
6
+ yggdrasil/ai/session.py,sha256=10ATAnw8FOCpfIg9sNR4meki_MRckUzKZ9Uft4IXwLA,2515
7
7
  yggdrasil/ai/sql_session.py,sha256=n92tQjHUBIey6c3EJProiEEwfAtQm07Dtmei4WXzeG0,10812
8
8
  yggdrasil/databricks/__init__.py,sha256=0GRBP930ManOvyo-Y5E7bz7F2msnvU677OH6rxzPwd8,87
9
9
  yggdrasil/databricks/exceptions.py,sha256=-ZULt0wD5_Rxww11nk4Z46DvS5j18RdKR5ISmbQfUQA,142
@@ -16,22 +16,22 @@ yggdrasil/databricks/compute/remote.py,sha256=sF99i7GXZcC0GiNgO9VO0I26rFbrtnDhK9
16
16
  yggdrasil/databricks/jobs/__init__.py,sha256=snxGSJb0M5I39v0y3IR-uEeSlZR248cQ_4DJ1sYs-h8,154
17
17
  yggdrasil/databricks/jobs/config.py,sha256=9LGeHD04hbfy0xt8_6oobC4moKJh4_DTjZiK4Q2Tqjk,11557
18
18
  yggdrasil/databricks/sql/__init__.py,sha256=PetgRp1jEj5K3TgN09FwNUVjVN8YYuGq0cDIOTqsbns,144
19
- yggdrasil/databricks/sql/engine.py,sha256=6PVxrO78UxDD6jHBnDjKV2KuZ9JpuvwVckQ_mjsaeKw,49558
19
+ yggdrasil/databricks/sql/engine.py,sha256=nFWeegs91CtjCLzxgZsJwOlAXNVI1v_lfecuFVfKFFY,49979
20
20
  yggdrasil/databricks/sql/exceptions.py,sha256=srMR3Y9LQm45rkyxfyCgpgcoGtRRvGKWBEoUHf4kxsg,1762
21
21
  yggdrasil/databricks/sql/statement_result.py,sha256=01DzFX1bGDIGHj0OW2ngfVVJ1w1KHlZEfAI934E35CU,15549
22
22
  yggdrasil/databricks/sql/types.py,sha256=5G-BM9_eOsRKEMzeDTWUsWW5g4Idvs-czVCpOCrMhdA,6412
23
- yggdrasil/databricks/sql/warehouse.py,sha256=W045PMLgZdt7f5w8aWIIX-vSLa5GE4V8yFffyrwZSOQ,18689
23
+ yggdrasil/databricks/sql/warehouse.py,sha256=bCMWAci_E7pxIH1-9qSgwzpLztLsyiBFjZgME9dOXC8,18971
24
24
  yggdrasil/databricks/workspaces/__init__.py,sha256=dv2zotoFVhNFlTCdRq6gwf5bEzeZkOZszoNZMs0k59g,114
25
25
  yggdrasil/databricks/workspaces/filesytem.py,sha256=Z8JXU7_XUEbw9fpTQT1avRQKi-IAP2KemXBMPkUoY4w,9805
26
- yggdrasil/databricks/workspaces/io.py,sha256=IHmOwX1cWksvfunwTr03BFPqhm8cWNEkCwMvM9vhM80,35162
27
- yggdrasil/databricks/workspaces/path.py,sha256=R6-RuMG7fZYBWS4wRmbw0bOIxiGYpwif47jFLWmcLGs,56950
26
+ yggdrasil/databricks/workspaces/io.py,sha256=RdgN5lmEYNF5phPRkRMCVHbUl-t3ZUGkKbzgYSTKpII,37420
27
+ yggdrasil/databricks/workspaces/path.py,sha256=k3UB0LhF4hQI-Iza50D5dVjhqNsAdP4KabKiWK7bTWM,55775
28
28
  yggdrasil/databricks/workspaces/path_kind.py,sha256=rhWe1ky7uPD0du0bZSv2S4fK4C5zWd7zAF3UeS2iiPU,283
29
29
  yggdrasil/databricks/workspaces/volumes_path.py,sha256=s8CA33cG3jpMVJy5MILLlkEBcFg_qInDCF2jozLj1Fg,2431
30
- yggdrasil/databricks/workspaces/workspace.py,sha256=GEUp3f15SP5lUDx0_Ujzv5QtjVQg00WJRydR4rNLdXs,30216
30
+ yggdrasil/databricks/workspaces/workspace.py,sha256=f8Ihv3fqo6YYno1yvkXnMpZhQQWwMkklhg-C9MSVyrE,30103
31
31
  yggdrasil/dataclasses/__init__.py,sha256=_RkhfF3KC1eSORby1dzvBXQ0-UGG3u6wyUQWX2jq1Pc,108
32
32
  yggdrasil/dataclasses/dataclass.py,sha256=LxrCjwvmBnb8yRI_N-c31RHHxB4XoJPixmKg9iBIuaI,1148
33
33
  yggdrasil/libs/__init__.py,sha256=zdC9OU0Xy36CLY9mg2drxN6S7isPR8aTLzJA6xVIeLE,91
34
- yggdrasil/libs/databrickslib.py,sha256=t_0b_3iCGFPjBrJaIOvNzSEn5pjZBTbY_fOcDHp6qx8,1135
34
+ yggdrasil/libs/databrickslib.py,sha256=Y99ARtrVKVBTH0qZ0njYr1Oa_757wtsVY4ywH07IdQ4,1109
35
35
  yggdrasil/libs/pandaslib.py,sha256=_U4sdFvLAFD16_65RG-RFmcx4c3fvVnALESFaAlT71M,887
36
36
  yggdrasil/libs/polarslib.py,sha256=WnnERtMTl__ZPidcZkoV7mb8-c680zcAnJgzAoD3ZE8,1437
37
37
  yggdrasil/libs/sparklib.py,sha256=FQ3W1iz2EIpQreorOiQuFt15rdhq2QhGEAWp8Zrbl9A,10177
@@ -44,19 +44,19 @@ yggdrasil/pyutils/equality.py,sha256=Xyf8D1dLUCm3spDEir8Zyj7O4US_fBJwEylJCfJ9slI
44
44
  yggdrasil/pyutils/exceptions.py,sha256=1c0xxFvGML5gkDPGzD_Tgw1ff9bGMVygH8ASgeoII2E,3889
45
45
  yggdrasil/pyutils/expiring_dict.py,sha256=pr2u25LGwPVbLfsLptiHGovUtYRRo0AMjaJtCtJl7nQ,8477
46
46
  yggdrasil/pyutils/mimetypes.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
- yggdrasil/pyutils/modules.py,sha256=B7IP99YqUMW6-DIESFzBx8-09V1d0a8qrIJUDFhhL2g,11424
47
+ yggdrasil/pyutils/modules.py,sha256=a0YWunsuA-D-Ho41LrwkN6o7e88NMk7oOzrSzlG0kPQ,11488
48
48
  yggdrasil/pyutils/parallel.py,sha256=ubuq2m9dJzWYUyKCga4Y_9bpaeMYUrleYxdp49CHr44,6781
49
- yggdrasil/pyutils/python_env.py,sha256=OFvM0wxVzB1iHC4BGra0tLE3sZL1ZaIr5j9dtDeLaiU,51098
49
+ yggdrasil/pyutils/python_env.py,sha256=d9s6i_fXz6j4f5BnigKxR0dEUJ-4BmqjBD2u2ybX29U,51025
50
50
  yggdrasil/pyutils/retry.py,sha256=gXBtn1DdmIYIUmGKOUr8-SUT7MOu97LykN2YR4uocgc,11917
51
51
  yggdrasil/pyutils/waiting_config.py,sha256=WiMOiKyGR5iKr83YK4dljn7OCaDpxXMUx8cz-bUNGMg,6255
52
52
  yggdrasil/requests/__init__.py,sha256=dMesyzq97_DmI765x0TwaDPEfsxFtgGNgchk8LvEN-o,103
53
- yggdrasil/requests/msal.py,sha256=s2GCyzbgFdgdlJ1JqMrZ4qYVbmoG46-ZOTcaVQhZ-sQ,9220
53
+ yggdrasil/requests/msal.py,sha256=XSuKsxEIApfygiWOBBOok_trQk3eeNb5P0f3RAUrtss,6666
54
54
  yggdrasil/requests/session.py,sha256=SLnrgHY0Lby7ZxclRFUjHdfM8euN_8bSQEWl7TkJY2U,1461
55
55
  yggdrasil/types/__init__.py,sha256=CrLiDeYNM9fO975sE5ufeVKcy7Ca702IsaG2Pk8T3YU,139
56
56
  yggdrasil/types/dummy_class.py,sha256=XXM3_ljL4XfY5LeF-WTj-myqHaKAUmWZ23cPDrXAnBM,2327
57
- yggdrasil/types/file_format.py,sha256=yqAadZ5z6CrctsQO0ZmEY7eGXLbhBUnvvNOwkPSk0GU,133
57
+ yggdrasil/types/file_format.py,sha256=P-3JTa9FzhHj-ndWMGgsF0zxlR_V2q3a_p2R2CwSoRs,273
58
58
  yggdrasil/types/python_arrow.py,sha256=mOhyecAxa5u8JWsyTO26OMOWimHHgwLKWlkNSAyIVas,25636
59
- yggdrasil/types/python_defaults.py,sha256=GO3hZBZcwRHs9qiXes75y8l5X00kZHTfEC7el_x73uw,10184
59
+ yggdrasil/types/python_defaults.py,sha256=kT7vuNDxzP_5tsy0aOkzVh1sZN7rKR7mky9nrYiFkl0,11063
60
60
  yggdrasil/types/cast/__init__.py,sha256=Oft3pTs2bRM5hT7YqJAuOKTYYk-SACLaMOXUVdafy_I,311
61
61
  yggdrasil/types/cast/arrow_cast.py,sha256=IZstOcHjLKPy62TFGgjMSW3ttPGt3hMi6RmDw-92T0E,41623
62
62
  yggdrasil/types/cast/cast_options.py,sha256=nDaEvCCs7TBamhTWyDrYf3LVaBWzioIP2Q5_LXrChF4,15532
@@ -67,8 +67,8 @@ yggdrasil/types/cast/registry.py,sha256=OOqIfbIjPH-a3figvu-zTvEtUDTEWhe2xIl3cCA4
67
67
  yggdrasil/types/cast/spark_cast.py,sha256=_KAsl1DqmKMSfWxqhVE7gosjYdgiL1C5bDQv6eP3HtA,24926
68
68
  yggdrasil/types/cast/spark_pandas_cast.py,sha256=BuTiWrdCANZCdD_p2MAytqm74eq-rdRXd-LGojBRrfU,5023
69
69
  yggdrasil/types/cast/spark_polars_cast.py,sha256=btmZNHXn2NSt3fUuB4xg7coaE0RezIBdZD92H8NK0Jw,9073
70
- ygg-0.1.60.dist-info/METADATA,sha256=LBZYw5kRHouaxOl7x_dskRBUpH-XXDk-XaFrSLGKrg0,18528
71
- ygg-0.1.60.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
72
- ygg-0.1.60.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
73
- ygg-0.1.60.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
74
- ygg-0.1.60.dist-info/RECORD,,
70
+ ygg-0.1.65.dist-info/METADATA,sha256=QQp-Hf_yN9HVD8Cjs6xgHzIaJ1g0GTJ7RODqgthqkh0,18506
71
+ ygg-0.1.65.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
72
+ ygg-0.1.65.dist-info/entry_points.txt,sha256=6q-vpWG3kvw2dhctQ0LALdatoeefkN855Ev02I1dKGY,70
73
+ ygg-0.1.65.dist-info/top_level.txt,sha256=iBe9Kk4VIVbLpgv_p8OZUIfxgj4dgJ5wBg6vO3rigso,10
74
+ ygg-0.1.65.dist-info/RECORD,,
yggdrasil/ai/session.py CHANGED
@@ -5,7 +5,7 @@ from abc import ABC, abstractmethod
5
5
  from dataclasses import dataclass, field
6
6
  from typing import Dict, List, Optional
7
7
 
8
- from yggdrasil.types.dummy_class import DummyModuleClass
8
+ from ..types.dummy_class import DummyModuleClass
9
9
 
10
10
  try:
11
11
  from openai import OpenAI
@@ -19,8 +19,6 @@ __all__ = ["AISession"]
19
19
  class AISession(ABC):
20
20
  api_key: str
21
21
  base_url: str
22
-
23
- # Gemini default (via OpenAI-compatible gateway)
24
22
  model: str = "gemini-2.5-flash"
25
23
 
26
24
  client: OpenAI = field(init=False)
@@ -17,7 +17,7 @@ import random
17
17
  import string
18
18
  import time
19
19
  from threading import Thread
20
- from typing import Optional, Union, Any, Dict, List, Literal
20
+ from typing import Optional, Union, Any, Dict, List, Literal, TYPE_CHECKING
21
21
 
22
22
  import pyarrow as pa
23
23
  import pyarrow.dataset as pds
@@ -26,11 +26,10 @@ from .statement_result import StatementResult
26
26
  from .types import column_info_to_arrow_field
27
27
  from .warehouse import SQLWarehouse
28
28
  from ..workspaces import WorkspaceService, DatabricksPath
29
- from ...ai.sql_session import SQLAISession, SQLFlavor
30
29
  from ...libs.databrickslib import databricks_sdk, DatabricksDummyClass
31
30
  from ...libs.sparklib import SparkSession, SparkDataFrame, pyspark
32
31
  from ...pyutils.waiting_config import WaitingConfigArg
33
- from ...types import is_arrow_type_string_like, is_arrow_type_binary_like
32
+ from ...types import is_arrow_type_string_like, is_arrow_type_binary_like, cast_arrow_tabular
34
33
  from ...types.cast.cast_options import CastOptions
35
34
  from ...types.cast.registry import convert
36
35
  from ...types.cast.spark_cast import cast_spark_dataframe
@@ -63,6 +62,10 @@ if pyspark is not None:
63
62
  import pyspark.sql.functions as F
64
63
 
65
64
 
65
+ if TYPE_CHECKING:
66
+ from ...ai.sql_session import SQLAISession, SQLFlavor
67
+
68
+
66
69
  __all__ = [
67
70
  "SQLEngine",
68
71
  "StatementResult"
@@ -101,7 +104,7 @@ class SQLEngine(WorkspaceService):
101
104
  schema_name: Optional[str] = None
102
105
 
103
106
  _warehouse: Optional[SQLWarehouse] = dataclasses.field(default=None, repr=False, hash=False, compare=False)
104
- _ai_session: Optional[SQLAISession] = dataclasses.field(default=None, repr=False, hash=False, compare=False)
107
+ _ai_session: Optional["SQLAISession"] = dataclasses.field(default=None, repr=False, hash=False, compare=False)
105
108
 
106
109
  def table_full_name(
107
110
  self,
@@ -198,8 +201,13 @@ class SQLEngine(WorkspaceService):
198
201
  def ai_session(
199
202
  self,
200
203
  model: str = "databricks-gemini-2-5-pro",
201
- flavor: SQLFlavor = SQLFlavor.DATABRICKS
204
+ flavor: Optional["SQLFlavor"] = None
202
205
  ):
206
+ from ...ai.sql_session import SQLAISession, SQLFlavor
207
+
208
+ if flavor is None:
209
+ flavor = SQLFlavor.DATABRICKS
210
+
203
211
  return SQLAISession(
204
212
  model=model,
205
213
  api_key=self.workspace.current_token(),
@@ -224,7 +232,7 @@ class SQLEngine(WorkspaceService):
224
232
  catalog_name: Optional[str] = None,
225
233
  schema_name: Optional[str] = None,
226
234
  wait: Optional[WaitingConfigArg] = True
227
- ) -> "StatementResult":
235
+ ) -> StatementResult:
228
236
  """Execute a SQL statement via Spark or Databricks SQL Statement Execution API.
229
237
 
230
238
  Engine resolution:
@@ -504,10 +512,13 @@ class SQLEngine(WorkspaceService):
504
512
  logger.exception("Failed to drop table %s after auto creation error", location)
505
513
  raise
506
514
 
507
- data_tbl = convert(
508
- data, pa.Table,
509
- options=cast_options, target_field=existing_schema
510
- )
515
+ cast_options = CastOptions.check_arg(options=cast_options, target_field=existing_schema)
516
+
517
+ if isinstance(data, (pa.Table, pa.RecordBatch)):
518
+ data_tbl = cast_arrow_tabular(data, options=cast_options)
519
+ else:
520
+ data_tbl = convert(data, pa.Table, options=cast_options)
521
+
511
522
  num_rows = data_tbl.num_rows
512
523
 
513
524
  logger.debug(
@@ -524,7 +535,8 @@ class SQLEngine(WorkspaceService):
524
535
  catalog_name=catalog_name,
525
536
  schema_name=schema_name,
526
537
  volume_name="tmp",
527
- extension="parquet"
538
+ extension="parquet",
539
+ max_lifetime=3600,
528
540
  ) if temp_volume_path is None else DatabricksPath.parse(obj=temp_volume_path, workspace=connected.workspace)
529
541
 
530
542
  logger.debug("Staging Parquet to temp volume: %s", temp_volume_path)
@@ -575,7 +587,7 @@ FROM parquet.`{temp_volume_path}`"""
575
587
  finally:
576
588
  try:
577
589
  Thread(
578
- target=temp_volume_path.rmdir,
590
+ target=temp_volume_path.remove,
579
591
  kwargs={
580
592
  "recursive": True
581
593
  }
@@ -256,6 +256,7 @@ class SQLWarehouse(WorkspaceService):
256
256
  elif self.warehouse_id:
257
257
  return self
258
258
 
259
+ starter_warehouse, starter_name = None, "Serverless Starter Warehouse"
259
260
  warehouse_name = warehouse_name or self.warehouse_name or self._make_default_name(enable_serverless_compute=True)
260
261
 
261
262
  if warehouse_name:
@@ -284,8 +285,15 @@ class SQLWarehouse(WorkspaceService):
284
285
  warehouse_name=warehouse_name,
285
286
  warehouse_id=warehouse.warehouse_id
286
287
  )
288
+
287
289
  return warehouse
288
290
 
291
+ elif warehouse.warehouse_name == starter_name:
292
+ starter_warehouse = warehouse
293
+
294
+ if starter_warehouse is not None:
295
+ return starter_warehouse
296
+
289
297
  if raise_error:
290
298
  v = warehouse_name or warehouse_id
291
299
 
@@ -3,28 +3,25 @@
3
3
  import base64
4
4
  import io
5
5
  import logging
6
+ import os
6
7
  import time
7
8
  from abc import ABC, abstractmethod
9
+ from tempfile import SpooledTemporaryFile
8
10
  from threading import Thread
9
- from typing import TYPE_CHECKING, Optional, IO, AnyStr, Union, Any
11
+ from typing import TYPE_CHECKING, Optional, IO, AnyStr, Union, Any, BinaryIO
10
12
 
11
13
  import dill
12
14
  import pyarrow as pa
13
15
  import pyarrow.csv as pcsv
14
16
  import pyarrow.parquet as pq
15
- from pyarrow.dataset import (
16
- FileFormat,
17
- ParquetFileFormat,
18
- CsvFileFormat,
19
- )
20
17
 
21
18
  from .path_kind import DatabricksPathKind
22
19
  from ...libs.databrickslib import databricks
23
20
  from ...libs.pandaslib import PandasDataFrame
24
21
  from ...libs.polarslib import polars, PolarsDataFrame
25
- from ...pyutils import retry
22
+ from ...pyutils.retry import retry
26
23
  from ...types.cast.registry import convert
27
- from ...types.file_format import ExcelFileFormat
24
+ from ...types.file_format import FileFormat, ParquetFileFormat, CsvFileFormat, ExcelFileFormat
28
25
 
29
26
  if databricks is not None:
30
27
  from databricks.sdk.service.workspace import ImportFormat, ExportFormat
@@ -45,7 +42,64 @@ __all__ = [
45
42
 
46
43
 
47
44
  LOGGER = logging.getLogger(__name__)
45
+ _SPOOL_MAX = 64 * 1024 * 1024 # 64MB in RAM then spill to disk
46
+ _COPY_CHUNK = 8 * 1024 * 1024 # 8MB chunks
47
+
48
+ def _prepare_binaryio_and_size(
49
+ data: Union[bytes, bytearray, memoryview, BinaryIO]
50
+ ) -> tuple[int, BinaryIO, bool]:
51
+ """
52
+ Returns (size, bio, should_close).
53
+
54
+ - bytes-like -> wrap in BytesIO (closeable by us).
55
+ - seekable file -> compute size via fstat or seek/tell.
56
+ - non-seekable stream -> spool into SpooledTemporaryFile, count bytes.
57
+ """
58
+ # bytes-like
59
+ if isinstance(data, (bytes, bytearray, memoryview)):
60
+ b = bytes(data)
61
+ return len(b), io.BytesIO(b), True
62
+
63
+ f: BinaryIO = data
64
+
65
+ # 1) try OS-level size for real files
66
+ try:
67
+ fileno = f.fileno() # type: ignore[attr-defined]
68
+ except Exception:
69
+ fileno = None
70
+
71
+ if fileno is not None:
72
+ try:
73
+ st = os.fstat(fileno)
74
+ # rewind if possible
75
+ try:
76
+ f.seek(0)
77
+ except Exception:
78
+ pass
79
+ return int(st.st_size), f, False
80
+ except Exception:
81
+ pass
82
+
83
+ # 2) try seek/tell (seekable streams)
84
+ try:
85
+ f.seek(0, io.SEEK_END)
86
+ end = f.tell()
87
+ f.seek(0)
88
+ return int(end), f, False
89
+ except Exception:
90
+ pass
48
91
 
92
+ # 3) non-seekable stream: spool + count
93
+ spooled = SpooledTemporaryFile(max_size=_SPOOL_MAX, mode="w+b")
94
+ size = 0
95
+ while True:
96
+ chunk = f.read(_COPY_CHUNK)
97
+ if not chunk:
98
+ break
99
+ spooled.write(chunk)
100
+ size += len(chunk)
101
+ spooled.seek(0)
102
+ return size, spooled, True
49
103
 
50
104
  class DatabricksIO(ABC, IO):
51
105
  """File-like interface for Databricks workspace, volume, or DBFS paths."""
@@ -102,7 +156,10 @@ class DatabricksIO(ABC, IO):
102
156
  return self.path.__hash__()
103
157
 
104
158
  def __str__(self):
105
- return self.path.__str__()
159
+ return "%s(path=%s)" % (
160
+ self.__class__.__name__,
161
+ self.path.__repr__()
162
+ )
106
163
 
107
164
  def __repr__(self):
108
165
  return "%s(path=%s)" % (
@@ -1081,9 +1138,9 @@ class DatabricksVolumeIO(DatabricksIO):
1081
1138
 
1082
1139
  try:
1083
1140
  resp = client.download(full_path)
1084
- except Exception as e:
1141
+ except (NotFound, ResourceDoesNotExist, BadRequest, InternalError) as e:
1085
1142
  # Databricks SDK exceptions vary a bit by version; keep it pragmatic.
1086
- if allow_not_found and any(s in str(e).lower() for s in ("not found", "not exist", "404")):
1143
+ if allow_not_found:
1087
1144
  return b""
1088
1145
  raise
1089
1146
 
@@ -1096,53 +1153,61 @@ class DatabricksVolumeIO(DatabricksIO):
1096
1153
  end = start + length
1097
1154
  return data[start:end]
1098
1155
 
1099
- @retry(exceptions=(InternalError,))
1100
- def write_all_bytes(self, data: Union[bytes, IO[bytes]]):
1101
- """Write bytes to a volume file.
1102
-
1103
- Args:
1104
- data: Union[bytes, IO[bytes]] to write.
1105
-
1106
- Returns:
1107
- The DatabricksVolumeIO instance.
1108
- """
1156
+ def write_all_bytes(
1157
+ self,
1158
+ data: Union[bytes, bytearray, memoryview, BinaryIO],
1159
+ *,
1160
+ overwrite: bool = True,
1161
+ part_size: Optional[int] = None,
1162
+ use_parallel: bool = True,
1163
+ parallelism: Optional[int] = None,
1164
+ ):
1165
+ """Write bytes/stream to a volume file safely (BinaryIO upload)."""
1109
1166
  sdk = self.workspace.sdk()
1110
1167
  client = sdk.files
1111
1168
  full_path = self.path.files_full_path()
1112
1169
 
1113
- LOGGER.debug(
1114
- "Writing all bytes in %s",
1115
- self
1116
- )
1170
+ LOGGER.debug("Writing all bytes in %s", self)
1117
1171
 
1118
- try:
1119
- client.upload(
1120
- full_path,
1121
- io.BytesIO(data),
1122
- overwrite=True
1123
- )
1124
- except (NotFound, ResourceDoesNotExist, BadRequest):
1125
- self.path.parent.mkdir(parents=True, exist_ok=True)
1172
+ size, bio, should_close = _prepare_binaryio_and_size(data)
1126
1173
 
1127
- client.upload(
1174
+ def _upload():
1175
+ return client.upload(
1128
1176
  full_path,
1129
- io.BytesIO(data),
1130
- overwrite=True
1177
+ bio,
1178
+ overwrite=overwrite,
1179
+ part_size=part_size,
1180
+ use_parallel=use_parallel,
1181
+ parallelism=parallelism,
1131
1182
  )
1132
1183
 
1133
- LOGGER.info(
1134
- "Written all bytes in %s",
1135
- self
1136
- )
1184
+ try:
1185
+ _ = _upload()
1186
+ except (NotFound, ResourceDoesNotExist, BadRequest, InternalError):
1187
+ self.path.parent.mkdir(parents=True, exist_ok=True)
1188
+ # Important: rewind if possible before retry
1189
+ try:
1190
+ bio.seek(0)
1191
+ except Exception:
1192
+ pass
1193
+ _ = _upload()
1194
+ finally:
1195
+ if should_close:
1196
+ try:
1197
+ bio.close()
1198
+ except Exception:
1199
+ pass
1137
1200
 
1138
1201
  self.path.reset_metadata(
1139
1202
  is_file=True,
1140
1203
  is_dir=False,
1141
- size=len(data),
1142
- mtime=time.time()
1204
+ size=size,
1205
+ mtime=time.time(),
1143
1206
  )
1144
1207
 
1145
- return self
1208
+ LOGGER.info("Written %s bytes in %s", size or "all", self.path)
1209
+
1210
+ return self # or return result if your API prefers that
1146
1211
 
1147
1212
 
1148
1213
  class DatabricksDBFSIO(DatabricksIO):
@@ -1,8 +1,6 @@
1
1
  """Databricks path abstraction spanning DBFS, workspace, and volumes."""
2
2
 
3
3
  # src/yggdrasil/databricks/workspaces/databricks_path.py
4
- from __future__ import annotations
5
-
6
4
  import dataclasses
7
5
  import datetime as dt
8
6
  import io
@@ -15,9 +13,7 @@ from typing import Optional, Tuple, Union, TYPE_CHECKING, List, Any, IO
15
13
 
16
14
  import dill
17
15
  import pyarrow as pa
18
- import pyarrow.dataset as ds
19
16
  from pyarrow import ArrowInvalid
20
- from pyarrow.dataset import FileFormat, ParquetFileFormat, CsvFileFormat, JsonFileFormat
21
17
  from pyarrow.fs import FileInfo, FileType, FileSystem
22
18
 
23
19
  from .io import DatabricksIO
@@ -25,12 +21,9 @@ from .path_kind import DatabricksPathKind
25
21
  from .volumes_path import get_volume_status, get_volume_metadata
26
22
  from ...libs.databrickslib import databricks
27
23
  from ...libs.pandaslib import PandasDataFrame
28
- from ...libs.polarslib import polars, PolarsDataFrame
29
- from ...types.cast.cast_options import CastOptions
30
- from ...types.cast.pandas_cast import pandas_converter, cast_pandas_dataframe
31
- from ...types.cast.polars_cast import polars_converter, cast_polars_dataframe
32
- from ...types.cast.registry import convert, register_converter
33
- from ...types.file_format import ExcelFileFormat
24
+ from ...libs.polarslib import polars
25
+ from ...types.cast.registry import convert
26
+ from ...types.file_format import FileFormat, ExcelFileFormat, ParquetFileFormat, JsonFileFormat, CsvFileFormat
34
27
 
35
28
  if databricks is not None:
36
29
  from databricks.sdk.errors import InternalError
@@ -1305,6 +1298,8 @@ class DatabricksPath:
1305
1298
  Returns:
1306
1299
  A PyArrow Dataset instance.
1307
1300
  """
1301
+ import pyarrow.dataset as ds
1302
+
1308
1303
  filesystem = self.filesystem(workspace=workspace) if filesystem is None else filesystem
1309
1304
 
1310
1305
  return ds.dataset(
@@ -1684,32 +1679,3 @@ class DatabricksPath:
1684
1679
  raise ValueError(
1685
1680
  "Invalid engine %s, must be in duckdb, polars" % engine
1686
1681
  )
1687
-
1688
- if databricks is not None:
1689
- @register_converter(DatabricksPath, ds.Dataset)
1690
- def databricks_path_to_arrow_table(
1691
- data: DatabricksPath,
1692
- options: Optional[CastOptions] = None,
1693
- ) -> ds.Dataset:
1694
- return data.arrow_dataset()
1695
-
1696
-
1697
- @pandas_converter(DatabricksPath, PandasDataFrame)
1698
- def databricks_path_to_pandas(
1699
- data: DatabricksPath,
1700
- options: Optional[CastOptions] = None,
1701
- ) -> PolarsDataFrame:
1702
- return cast_pandas_dataframe(
1703
- data.read_pandas(),
1704
- options
1705
- )
1706
-
1707
- @polars_converter(DatabricksPath, PolarsDataFrame)
1708
- def databricks_path_to_polars(
1709
- data: DatabricksPath,
1710
- options: Optional[CastOptions] = None,
1711
- ) -> PolarsDataFrame:
1712
- return cast_polars_dataframe(
1713
- data.read_polars(),
1714
- options
1715
- )
@@ -520,9 +520,9 @@ class Workspace:
520
520
  Returns:
521
521
  A DatabricksPath pointing at the shared cache location.
522
522
  """
523
- start = int(time.time() * 1000)
524
- max_lifetime = max_lifetime or 48.0 * 3600.0
525
- end = int(start + max_lifetime)
523
+ start = int(time.time())
524
+ max_lifetime = int(max_lifetime or 48 * 3600)
525
+ end = max(0, int(start + max_lifetime))
526
526
 
527
527
  base_path = base_path or self._base_tmp_path(
528
528
  catalog_name=catalog_name,
@@ -575,19 +575,15 @@ class Workspace:
575
575
  base_path
576
576
  )
577
577
 
578
- try:
579
- for path in base_path.ls(recursive=False, allow_not_found=True):
578
+ for path in base_path.ls(recursive=False, allow_not_found=True):
579
+ if path.name.startswith("tmp"):
580
580
  parts = path.name.split("-")
581
581
 
582
582
  if len(parts) > 2 and parts[0] == "tmp" and parts[1].isdigit() and parts[2].isdigit():
583
- end = int(parts[2]) / 1000.0
583
+ end = int(parts[2])
584
584
 
585
585
  if end and time.time() > end:
586
586
  path.remove(recursive=True)
587
- except Exception as e:
588
- if raise_error:
589
- raise e
590
- LOGGER.warning(e)
591
587
 
592
588
  LOGGER.info(
593
589
  "Cleaned temp path %s",
@@ -1,5 +1,5 @@
1
1
  """Optional Databricks SDK dependency helpers."""
2
- from yggdrasil.types.dummy_class import DummyModuleClass
2
+ from ..types.dummy_class import DummyModuleClass
3
3
 
4
4
 
5
5
  class DatabricksDummyClass(DummyModuleClass):
@@ -25,7 +25,7 @@ def require_databricks_sdk():
25
25
 
26
26
  try:
27
27
  import databricks
28
- import databricks.sdk # type: ignore
28
+ import databricks.sdk
29
29
 
30
30
  from databricks.sdk import WorkspaceClient
31
31
 
@@ -34,7 +34,6 @@ try:
34
34
  except ImportError:
35
35
  databricks = DatabricksDummyClass
36
36
  databricks_sdk = DatabricksDummyClass
37
-
38
37
  WorkspaceClient = DatabricksDummyClass
39
38
 
40
39
 
@@ -42,7 +42,7 @@ MODULE_PROJECT_NAMES_ALIASES = {
42
42
  "yggdrasil": "ygg",
43
43
  "jwt": "PyJWT",
44
44
  }
45
-
45
+ DEFAULT_PIP_INDEX_SETTINGS = None
46
46
 
47
47
  def module_name_to_project_name(module_name: str) -> str:
48
48
  """Map module import names to PyPI project names when they differ.
@@ -264,6 +264,11 @@ class PipIndexSettings:
264
264
  Returns:
265
265
  Default PipIndexSettings instance.
266
266
  """
267
+ global DEFAULT_PIP_INDEX_SETTINGS
268
+
269
+ if DEFAULT_PIP_INDEX_SETTINGS is None:
270
+ DEFAULT_PIP_INDEX_SETTINGS = get_pip_index_settings()
271
+
267
272
  return DEFAULT_PIP_INDEX_SETTINGS
268
273
 
269
274
  @property
@@ -363,9 +368,3 @@ def get_pip_index_settings() -> PipIndexSettings:
363
368
  extra_index_urls.append(u)
364
369
 
365
370
  return PipIndexSettings(index_url=index_url, extra_index_urls=extra_index_urls, sources=sources)
366
-
367
-
368
- try:
369
- DEFAULT_PIP_INDEX_SETTINGS = get_pip_index_settings()
370
- except:
371
- DEFAULT_PIP_INDEX_SETTINGS = PipIndexSettings()
@@ -27,7 +27,6 @@ log = logging.getLogger(__name__)
27
27
 
28
28
  class PythonEnvError(RuntimeError):
29
29
  """Raised when Python environment operations fail."""
30
-
31
30
  pass
32
31
 
33
32
 
@@ -72,6 +71,9 @@ _NON_PIPABLE_RE = re.compile(
72
71
  re.IGNORECASE,
73
72
  )
74
73
 
74
+ # Snapshot singleton (import-time)
75
+ CURRENT_PYTHON_ENV: "PythonEnv" = None
76
+
75
77
 
76
78
 
77
79
  def _filter_non_pipable_linux_packages(requirements: Iterable[str]) -> List[str]:
@@ -1508,11 +1510,3 @@ print("RESULT:" + json.dumps(top_level))""".strip()
1508
1510
  log.error("python_env CLI error: %s", e)
1509
1511
  print(f"ERROR: {e}", file=sys.stderr)
1510
1512
  return 2
1511
-
1512
-
1513
- # Snapshot singleton (import-time)
1514
- CURRENT_PYTHON_ENV: PythonEnv = None
1515
-
1516
-
1517
- if __name__ == "__main__":
1518
- raise SystemExit(PythonEnv.cli())
@@ -3,12 +3,8 @@
3
3
  # auth_session.py
4
4
  import os
5
5
  import time
6
- from typing import Any, Mapping, Optional
7
-
8
- import urllib3
9
-
10
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
11
- from dataclasses import dataclass
6
+ from dataclasses import dataclass, field
7
+ from typing import Any, Optional
12
8
 
13
9
  from .session import YGGSession
14
10
 
@@ -38,11 +34,11 @@ class MSALAuth:
38
34
  authority: Optional authority URL override.
39
35
  scopes: List of scopes to request.
40
36
  """
41
- tenant_id: Optional[str] = None
42
- client_id: Optional[str] = None
43
- client_secret: Optional[str] = None
44
- authority: Optional[str] = None
45
- scopes: list[str] | None = None
37
+ tenant_id: Optional[str] = field(default_factory=lambda: os.environ.get("AZURE_TENANT_ID"))
38
+ client_id: Optional[str] = field(default_factory=lambda: os.environ.get("AZURE_CLIENT_ID"))
39
+ client_secret: Optional[str] = field(default_factory=lambda: os.environ.get("AZURE_CLIENT_SECRET"))
40
+ authority: Optional[str] = field(default_factory=lambda: os.environ.get("AZURE_AUTHORITY"))
41
+ scopes: list[str] | None = field(default_factory=lambda: os.environ.get("AZURE_SCOPES"))
46
42
 
47
43
  _auth_app: ConfidentialClientApplication | None = None
48
44
  _expires_at: float | None = None
@@ -77,97 +73,15 @@ class MSALAuth:
77
73
  Returns:
78
74
  None.
79
75
  """
80
- self.tenant_id = self.tenant_id or os.environ.get("AZURE_TENANT_ID")
81
- self.client_id = self.client_id or os.environ.get("AZURE_CLIENT_ID")
82
- self.client_secret = self.client_secret or os.environ.get("AZURE_CLIENT_SECRET")
83
-
84
- self.authority = self.authority or os.environ.get("AZURE_AUTHORITY")
85
76
  if not self.authority:
77
+ assert self.tenant_id, "tenant_id is required to build authority URL"
78
+
86
79
  self.authority = f"https://login.microsoftonline.com/{self.tenant_id}"
87
80
 
88
- self.scopes = self.scopes or os.environ.get("AZURE_SCOPES")
89
81
  if self.scopes:
90
82
  if isinstance(self.scopes, str):
91
83
  self.scopes = self.scopes.split(",")
92
84
 
93
- self._validate_config()
94
-
95
- def _validate_config(self):
96
- """Validate that all required configuration is present.
97
-
98
- Returns:
99
- None.
100
- """
101
- missing = []
102
-
103
- if not self.client_id:
104
- missing.append("azure_client_id (AZURE_CLIENT_ID)")
105
- if not self.client_secret:
106
- missing.append("azure_client_secret (AZURE_CLIENT_SECRET)")
107
- if not self.tenant_id:
108
- missing.append("azure_client_secret (AZURE_TENANT_ID)")
109
- if not self.scopes:
110
- missing.append("scopes (AZURE_SCOPES)")
111
-
112
- if missing:
113
- raise ValueError(f"Missing required configuration: {', '.join(missing)}")
114
-
115
- @classmethod
116
- def find_in_env(
117
- cls,
118
- env: Mapping = None,
119
- prefix: Optional[str] = None
120
- ) -> "MSALAuth":
121
- """Return an MSALAuth built from environment variables if available.
122
-
123
- Args:
124
- env: Mapping to read variables from; defaults to os.environ.
125
- prefix: Optional prefix for variable names.
126
-
127
- Returns:
128
- A configured MSALAuth instance or None.
129
- """
130
- if not env:
131
- env = os.environ
132
- prefix = prefix or "AZURE_"
133
-
134
- required = {
135
- key: env.get(prefix + key.upper())
136
- for key in (
137
- "client_id", "client_secret", "tenant_id", "scopes"
138
- )
139
- }
140
-
141
- if all(required.values()):
142
- scopes = required["scopes"].split(",") if required["scopes"] else None
143
- return MSALAuth(
144
- tenant_id=required["tenant_id"],
145
- client_id=required["client_id"],
146
- client_secret=required["client_secret"],
147
- scopes=scopes,
148
- authority=env.get(prefix + "AUTHORITY"),
149
- )
150
-
151
- return None
152
-
153
- def export_to(self, to: dict = os.environ):
154
- """Export the auth configuration to the provided mapping.
155
-
156
- Args:
157
- to: Mapping to populate with auth configuration values.
158
-
159
- Returns:
160
- None.
161
- """
162
- for key, value in (
163
- ("AZURE_CLIENT_ID", self.client_id),
164
- ("AZURE_CLIENT_SECRET", self.client_secret),
165
- ("AZURE_AUTHORITY", self.authority),
166
- ("AZURE_SCOPES", ",".join(self.scopes)),
167
- ):
168
- if value:
169
- to[key] = value
170
-
171
85
  @property
172
86
  def auth_app(self) -> ConfidentialClientApplication:
173
87
  """Return or initialize the MSAL confidential client.
@@ -298,7 +212,6 @@ class MSALSession(YGGSession):
298
212
  super().__init__(*args, **kwargs)
299
213
  self.msal_auth = msal_auth
300
214
 
301
-
302
215
  def prepare_request(self, request):
303
216
  """Prepare the request with an Authorization header when needed.
304
217
 
@@ -1,8 +1,12 @@
1
- from pyarrow.dataset import FileFormat
1
+ from pyarrow.dataset import FileFormat, ParquetFileFormat, CsvFileFormat, JsonFileFormat
2
2
 
3
3
 
4
4
  __all__ = [
5
- "ExcelFileFormat"
5
+ "FileFormat",
6
+ "ExcelFileFormat",
7
+ "ParquetFileFormat",
8
+ "CsvFileFormat",
9
+ "JsonFileFormat"
6
10
  ]
7
11
 
8
12
 
@@ -18,84 +18,96 @@ __all__ = [
18
18
  "default_arrow_array"
19
19
  ]
20
20
 
21
+ DEFAULT_MAPS_INITIALIZED = False
21
22
 
22
23
  _NONE_TYPE = type(None)
23
- _PRIMITIVE_DEFAULTS = {
24
- str: "",
25
- int: 0,
26
- float: 0.0,
27
- bool: False,
28
- bytes: b"",
29
- }
30
-
31
- _SPECIAL_DEFAULTS = {
32
- datetime.datetime: lambda: datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc),
33
- datetime.date: lambda: datetime.date(1970, 1, 1),
34
- datetime.time: lambda: datetime.time(0, 0, 0, tzinfo=datetime.timezone.utc),
35
- datetime.timedelta: lambda: datetime.timedelta(0),
36
- uuid.UUID: lambda: uuid.UUID(int=0),
37
- decimal.Decimal: lambda: decimal.Decimal(0),
38
- }
39
-
40
- _ARROW_DEFAULTS = {
41
- pa.null(): pa.scalar(None, type=pa.null()),
42
-
43
- pa.bool_(): pa.scalar(False, type=pa.bool_()),
44
-
45
- pa.int8(): pa.scalar(0, type=pa.int8()),
46
- pa.int16(): pa.scalar(0, type=pa.int16()),
47
- pa.int32(): pa.scalar(0, type=pa.int32()),
48
- pa.int64(): pa.scalar(0, type=pa.int64()),
49
-
50
- pa.uint8(): pa.scalar(0, type=pa.uint8()),
51
- pa.uint16(): pa.scalar(0, type=pa.uint16()),
52
- pa.uint32(): pa.scalar(0, type=pa.uint32()),
53
- pa.uint64(): pa.scalar(0, type=pa.uint64()),
54
-
55
- # pa.float16(): pa.scalar(0.0, type=pa.float16()),
56
- pa.float32(): pa.scalar(0.0, type=pa.float32()),
57
- pa.float64(): pa.scalar(0.0, type=pa.float64()),
58
-
59
- pa.string(): pa.scalar("", type=pa.string()),
60
- pa.string_view(): pa.scalar("", type=pa.string_view()),
61
- pa.large_string(): pa.scalar("", type=pa.large_string()),
62
-
63
- pa.binary(): pa.scalar(b"", type=pa.binary()),
64
- pa.binary_view(): pa.scalar(b"", type=pa.binary_view()),
65
- pa.large_binary(): pa.scalar(b"", type=pa.large_binary()),
66
- }
67
-
68
-
69
- try:
70
- import polars
71
-
72
- polars = polars
73
-
74
- _POLARS_DEFAULTS = {
75
- polars.Null(): None,
76
- polars.Boolean(): False,
77
-
78
- polars.Binary(): b"",
79
-
80
- polars.Utf8(): "",
81
-
82
- polars.Int8(): 0,
83
- polars.Int16(): 0,
84
- polars.Int32(): 0,
85
- polars.Int64(): 0,
86
-
87
- polars.UInt8(): 0,
88
- polars.UInt16(): 0,
89
- polars.UInt32(): 0,
90
- polars.UInt64(): 0,
91
-
92
- polars.Float32(): 0.0,
93
- polars.Float64(): 0.0,
94
- }
95
- except ImportError:
96
- polars = None
97
-
98
- _POLARS_DEFAULTS = {}
24
+ _ARROW_DEFAULTS = {}
25
+ _POLARS_DEFAULTS = {}
26
+ _PRIMITIVE_DEFAULTS = {}
27
+ _SPECIAL_DEFAULTS = {}
28
+
29
+
30
+ def ensure_default_maps_initialized():
31
+ global DEFAULT_MAPS_INITIALIZED
32
+ global _PRIMITIVE_DEFAULTS
33
+ global _SPECIAL_DEFAULTS
34
+ global _ARROW_DEFAULTS
35
+ global _POLARS_DEFAULTS
36
+
37
+ if not DEFAULT_MAPS_INITIALIZED:
38
+ _PRIMITIVE_DEFAULTS = {
39
+ str: "",
40
+ int: 0,
41
+ float: 0.0,
42
+ bool: False,
43
+ bytes: b"",
44
+ }
45
+
46
+ _SPECIAL_DEFAULTS = {
47
+ datetime.datetime: lambda: datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc),
48
+ datetime.date: lambda: datetime.date(1970, 1, 1),
49
+ datetime.time: lambda: datetime.time(0, 0, 0, tzinfo=datetime.timezone.utc),
50
+ datetime.timedelta: lambda: datetime.timedelta(0),
51
+ uuid.UUID: lambda: uuid.UUID(int=0),
52
+ decimal.Decimal: lambda: decimal.Decimal(0),
53
+ }
54
+
55
+ _ARROW_DEFAULTS = {
56
+ pa.null(): pa.scalar(None, type=pa.null()),
57
+
58
+ pa.bool_(): pa.scalar(False, type=pa.bool_()),
59
+
60
+ pa.int8(): pa.scalar(0, type=pa.int8()),
61
+ pa.int16(): pa.scalar(0, type=pa.int16()),
62
+ pa.int32(): pa.scalar(0, type=pa.int32()),
63
+ pa.int64(): pa.scalar(0, type=pa.int64()),
64
+
65
+ pa.uint8(): pa.scalar(0, type=pa.uint8()),
66
+ pa.uint16(): pa.scalar(0, type=pa.uint16()),
67
+ pa.uint32(): pa.scalar(0, type=pa.uint32()),
68
+ pa.uint64(): pa.scalar(0, type=pa.uint64()),
69
+
70
+ # pa.float16(): pa.scalar(0.0, type=pa.float16()),
71
+ pa.float32(): pa.scalar(0.0, type=pa.float32()),
72
+ pa.float64(): pa.scalar(0.0, type=pa.float64()),
73
+
74
+ pa.string(): pa.scalar("", type=pa.string()),
75
+ pa.string_view(): pa.scalar("", type=pa.string_view()),
76
+ pa.large_string(): pa.scalar("", type=pa.large_string()),
77
+
78
+ pa.binary(): pa.scalar(b"", type=pa.binary()),
79
+ pa.binary_view(): pa.scalar(b"", type=pa.binary_view()),
80
+ pa.large_binary(): pa.scalar(b"", type=pa.large_binary()),
81
+ }
82
+
83
+ try:
84
+ import polars
85
+
86
+ _POLARS_DEFAULTS = {
87
+ polars.Null(): None,
88
+ polars.Boolean(): False,
89
+
90
+ polars.Binary(): b"",
91
+
92
+ polars.Utf8(): "",
93
+
94
+ polars.Int8(): 0,
95
+ polars.Int16(): 0,
96
+ polars.Int32(): 0,
97
+ polars.Int64(): 0,
98
+
99
+ polars.UInt8(): 0,
100
+ polars.UInt16(): 0,
101
+ polars.UInt32(): 0,
102
+ polars.UInt64(): 0,
103
+
104
+ polars.Float32(): 0.0,
105
+ polars.Float64(): 0.0,
106
+ }
107
+ except ImportError:
108
+ pass
109
+
110
+ DEFAULT_MAPS_INITIALIZED = True
99
111
 
100
112
  def _is_optional(hint) -> bool:
101
113
  """Return True when the type hint is Optional.
@@ -199,6 +211,8 @@ def default_arrow_scalar(
199
211
  Returns:
200
212
  Arrow scalar default.
201
213
  """
214
+ ensure_default_maps_initialized()
215
+
202
216
  if nullable:
203
217
  return pa.scalar(None, type=dtype)
204
218
 
@@ -307,6 +321,8 @@ def default_python_scalar(hint: Any):
307
321
  if _is_optional(hint):
308
322
  return None
309
323
 
324
+ ensure_default_maps_initialized()
325
+
310
326
  if hint in _PRIMITIVE_DEFAULTS:
311
327
  return _PRIMITIVE_DEFAULTS[hint]
312
328
 
yggdrasil/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.60"
1
+ __version__ = "0.1.65"
File without changes