ygg 0.1.60__tar.gz → 0.1.64__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. {ygg-0.1.60 → ygg-0.1.64}/PKG-INFO +2 -2
  2. {ygg-0.1.60 → ygg-0.1.64}/pyproject.toml +2 -2
  3. {ygg-0.1.60 → ygg-0.1.64}/src/ygg.egg-info/PKG-INFO +2 -2
  4. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/ai/session.py +1 -3
  5. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/sql/engine.py +24 -12
  6. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/sql/warehouse.py +8 -0
  7. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/workspaces/io.py +108 -43
  8. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/workspaces/path.py +5 -39
  9. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/workspaces/workspace.py +6 -10
  10. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/libs/databrickslib.py +2 -3
  11. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/pyutils/modules.py +6 -7
  12. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/pyutils/python_env.py +3 -9
  13. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/requests/msal.py +9 -96
  14. ygg-0.1.64/src/yggdrasil/types/file_format.py +14 -0
  15. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/types/python_defaults.py +92 -76
  16. ygg-0.1.64/src/yggdrasil/version.py +1 -0
  17. ygg-0.1.60/src/yggdrasil/types/file_format.py +0 -10
  18. ygg-0.1.60/src/yggdrasil/version.py +0 -1
  19. {ygg-0.1.60 → ygg-0.1.64}/LICENSE +0 -0
  20. {ygg-0.1.60 → ygg-0.1.64}/README.md +0 -0
  21. {ygg-0.1.60 → ygg-0.1.64}/setup.cfg +0 -0
  22. {ygg-0.1.60 → ygg-0.1.64}/src/ygg.egg-info/SOURCES.txt +0 -0
  23. {ygg-0.1.60 → ygg-0.1.64}/src/ygg.egg-info/dependency_links.txt +0 -0
  24. {ygg-0.1.60 → ygg-0.1.64}/src/ygg.egg-info/entry_points.txt +0 -0
  25. {ygg-0.1.60 → ygg-0.1.64}/src/ygg.egg-info/requires.txt +0 -0
  26. {ygg-0.1.60 → ygg-0.1.64}/src/ygg.egg-info/top_level.txt +0 -0
  27. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/__init__.py +0 -0
  28. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/ai/__init__.py +0 -0
  29. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/ai/sql_session.py +0 -0
  30. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/__init__.py +0 -0
  31. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/compute/__init__.py +0 -0
  32. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/compute/cluster.py +0 -0
  33. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/compute/command_execution.py +0 -0
  34. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/compute/exceptions.py +0 -0
  35. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/compute/execution_context.py +0 -0
  36. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/compute/remote.py +0 -0
  37. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/exceptions.py +0 -0
  38. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/jobs/__init__.py +0 -0
  39. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/jobs/config.py +0 -0
  40. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/sql/__init__.py +0 -0
  41. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/sql/exceptions.py +0 -0
  42. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/sql/statement_result.py +0 -0
  43. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/sql/types.py +0 -0
  44. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/workspaces/__init__.py +0 -0
  45. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/workspaces/filesytem.py +0 -0
  46. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/workspaces/path_kind.py +0 -0
  47. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/databricks/workspaces/volumes_path.py +0 -0
  48. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/dataclasses/__init__.py +0 -0
  49. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/dataclasses/dataclass.py +0 -0
  50. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/exceptions.py +0 -0
  51. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/libs/__init__.py +0 -0
  52. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/libs/extensions/__init__.py +0 -0
  53. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/libs/extensions/polars_extensions.py +0 -0
  54. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/libs/extensions/spark_extensions.py +0 -0
  55. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/libs/pandaslib.py +0 -0
  56. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/libs/polarslib.py +0 -0
  57. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/libs/sparklib.py +0 -0
  58. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/pyutils/__init__.py +0 -0
  59. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/pyutils/callable_serde.py +0 -0
  60. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/pyutils/equality.py +0 -0
  61. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/pyutils/exceptions.py +0 -0
  62. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/pyutils/expiring_dict.py +0 -0
  63. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/pyutils/mimetypes.py +0 -0
  64. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/pyutils/parallel.py +0 -0
  65. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/pyutils/retry.py +0 -0
  66. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/pyutils/waiting_config.py +0 -0
  67. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/requests/__init__.py +0 -0
  68. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/requests/session.py +0 -0
  69. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/types/__init__.py +0 -0
  70. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/types/cast/__init__.py +0 -0
  71. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/types/cast/arrow_cast.py +0 -0
  72. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/types/cast/cast_options.py +0 -0
  73. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/types/cast/pandas_cast.py +0 -0
  74. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/types/cast/polars_cast.py +0 -0
  75. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/types/cast/polars_pandas_cast.py +0 -0
  76. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/types/cast/registry.py +0 -0
  77. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/types/cast/spark_cast.py +0 -0
  78. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/types/cast/spark_pandas_cast.py +0 -0
  79. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/types/cast/spark_polars_cast.py +0 -0
  80. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/types/dummy_class.py +0 -0
  81. {ygg-0.1.60 → ygg-0.1.64}/src/yggdrasil/types/python_arrow.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.60
3
+ Version: 0.1.64
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -207,7 +207,7 @@ License: Apache License
207
207
 
208
208
  Project-URL: Homepage, https://github.com/Platob/Yggdrasil
209
209
  Project-URL: Repository, https://github.com/Platob/Yggdrasil
210
- Project-URL: Documentation, https://github.com/Platob/Yggdrasil/tree/main/python/docs
210
+ Project-URL: Documentation, https://github.com/Platob/Yggdrasil
211
211
  Keywords: arrow,polars,pandas,spark,databricks,typing,dataclass,serialization
212
212
  Classifier: Development Status :: 3 - Alpha
213
213
  Classifier: Programming Language :: Python
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
 
6
6
  [project]
7
7
  name = "ygg"
8
- version = "0.1.60"
8
+ version = "0.1.64"
9
9
  description = "Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks"
10
10
  readme = { file = "README.md", content-type = "text/markdown" }
11
11
  license = { file = "LICENSE" }
@@ -51,7 +51,7 @@ yggenv = "yggdrasil.pyutils.python_env:PythonEnv.cli"
51
51
  [project.urls]
52
52
  Homepage = "https://github.com/Platob/Yggdrasil"
53
53
  Repository = "https://github.com/Platob/Yggdrasil"
54
- Documentation = "https://github.com/Platob/Yggdrasil/tree/main/python/docs"
54
+ Documentation = "https://github.com/Platob/Yggdrasil"
55
55
 
56
56
  [tool.setuptools]
57
57
  package-dir = { "" = "src" }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.60
3
+ Version: 0.1.64
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -207,7 +207,7 @@ License: Apache License
207
207
 
208
208
  Project-URL: Homepage, https://github.com/Platob/Yggdrasil
209
209
  Project-URL: Repository, https://github.com/Platob/Yggdrasil
210
- Project-URL: Documentation, https://github.com/Platob/Yggdrasil/tree/main/python/docs
210
+ Project-URL: Documentation, https://github.com/Platob/Yggdrasil
211
211
  Keywords: arrow,polars,pandas,spark,databricks,typing,dataclass,serialization
212
212
  Classifier: Development Status :: 3 - Alpha
213
213
  Classifier: Programming Language :: Python
@@ -5,7 +5,7 @@ from abc import ABC, abstractmethod
5
5
  from dataclasses import dataclass, field
6
6
  from typing import Dict, List, Optional
7
7
 
8
- from yggdrasil.types.dummy_class import DummyModuleClass
8
+ from ..types.dummy_class import DummyModuleClass
9
9
 
10
10
  try:
11
11
  from openai import OpenAI
@@ -19,8 +19,6 @@ __all__ = ["AISession"]
19
19
  class AISession(ABC):
20
20
  api_key: str
21
21
  base_url: str
22
-
23
- # Gemini default (via OpenAI-compatible gateway)
24
22
  model: str = "gemini-2.5-flash"
25
23
 
26
24
  client: OpenAI = field(init=False)
@@ -17,7 +17,7 @@ import random
17
17
  import string
18
18
  import time
19
19
  from threading import Thread
20
- from typing import Optional, Union, Any, Dict, List, Literal
20
+ from typing import Optional, Union, Any, Dict, List, Literal, TYPE_CHECKING
21
21
 
22
22
  import pyarrow as pa
23
23
  import pyarrow.dataset as pds
@@ -26,11 +26,10 @@ from .statement_result import StatementResult
26
26
  from .types import column_info_to_arrow_field
27
27
  from .warehouse import SQLWarehouse
28
28
  from ..workspaces import WorkspaceService, DatabricksPath
29
- from ...ai.sql_session import SQLAISession, SQLFlavor
30
29
  from ...libs.databrickslib import databricks_sdk, DatabricksDummyClass
31
30
  from ...libs.sparklib import SparkSession, SparkDataFrame, pyspark
32
31
  from ...pyutils.waiting_config import WaitingConfigArg
33
- from ...types import is_arrow_type_string_like, is_arrow_type_binary_like
32
+ from ...types import is_arrow_type_string_like, is_arrow_type_binary_like, cast_arrow_tabular
34
33
  from ...types.cast.cast_options import CastOptions
35
34
  from ...types.cast.registry import convert
36
35
  from ...types.cast.spark_cast import cast_spark_dataframe
@@ -63,6 +62,10 @@ if pyspark is not None:
63
62
  import pyspark.sql.functions as F
64
63
 
65
64
 
65
+ if TYPE_CHECKING:
66
+ from ...ai.sql_session import SQLAISession, SQLFlavor
67
+
68
+
66
69
  __all__ = [
67
70
  "SQLEngine",
68
71
  "StatementResult"
@@ -101,7 +104,7 @@ class SQLEngine(WorkspaceService):
101
104
  schema_name: Optional[str] = None
102
105
 
103
106
  _warehouse: Optional[SQLWarehouse] = dataclasses.field(default=None, repr=False, hash=False, compare=False)
104
- _ai_session: Optional[SQLAISession] = dataclasses.field(default=None, repr=False, hash=False, compare=False)
107
+ _ai_session: Optional["SQLAISession"] = dataclasses.field(default=None, repr=False, hash=False, compare=False)
105
108
 
106
109
  def table_full_name(
107
110
  self,
@@ -198,8 +201,13 @@ class SQLEngine(WorkspaceService):
198
201
  def ai_session(
199
202
  self,
200
203
  model: str = "databricks-gemini-2-5-pro",
201
- flavor: SQLFlavor = SQLFlavor.DATABRICKS
204
+ flavor: Optional["SQLFlavor"] = None
202
205
  ):
206
+ from ...ai.sql_session import SQLAISession, SQLFlavor
207
+
208
+ if flavor is None:
209
+ flavor = SQLFlavor.DATABRICKS
210
+
203
211
  return SQLAISession(
204
212
  model=model,
205
213
  api_key=self.workspace.current_token(),
@@ -224,7 +232,7 @@ class SQLEngine(WorkspaceService):
224
232
  catalog_name: Optional[str] = None,
225
233
  schema_name: Optional[str] = None,
226
234
  wait: Optional[WaitingConfigArg] = True
227
- ) -> "StatementResult":
235
+ ) -> StatementResult:
228
236
  """Execute a SQL statement via Spark or Databricks SQL Statement Execution API.
229
237
 
230
238
  Engine resolution:
@@ -504,10 +512,13 @@ class SQLEngine(WorkspaceService):
504
512
  logger.exception("Failed to drop table %s after auto creation error", location)
505
513
  raise
506
514
 
507
- data_tbl = convert(
508
- data, pa.Table,
509
- options=cast_options, target_field=existing_schema
510
- )
515
+ cast_options = CastOptions.check_arg(options=cast_options, target_field=existing_schema)
516
+
517
+ if isinstance(data, (pa.Table, pa.RecordBatch)):
518
+ data_tbl = cast_arrow_tabular(data, options=cast_options)
519
+ else:
520
+ data_tbl = convert(data, pa.Table, options=cast_options)
521
+
511
522
  num_rows = data_tbl.num_rows
512
523
 
513
524
  logger.debug(
@@ -524,7 +535,8 @@ class SQLEngine(WorkspaceService):
524
535
  catalog_name=catalog_name,
525
536
  schema_name=schema_name,
526
537
  volume_name="tmp",
527
- extension="parquet"
538
+ extension="parquet",
539
+ max_lifetime=3600,
528
540
  ) if temp_volume_path is None else DatabricksPath.parse(obj=temp_volume_path, workspace=connected.workspace)
529
541
 
530
542
  logger.debug("Staging Parquet to temp volume: %s", temp_volume_path)
@@ -575,7 +587,7 @@ FROM parquet.`{temp_volume_path}`"""
575
587
  finally:
576
588
  try:
577
589
  Thread(
578
- target=temp_volume_path.rmdir,
590
+ target=temp_volume_path.remove,
579
591
  kwargs={
580
592
  "recursive": True
581
593
  }
@@ -256,6 +256,7 @@ class SQLWarehouse(WorkspaceService):
256
256
  elif self.warehouse_id:
257
257
  return self
258
258
 
259
+ starter_warehouse, starter_name = None, "Serverless Starter Warehouse"
259
260
  warehouse_name = warehouse_name or self.warehouse_name or self._make_default_name(enable_serverless_compute=True)
260
261
 
261
262
  if warehouse_name:
@@ -284,8 +285,15 @@ class SQLWarehouse(WorkspaceService):
284
285
  warehouse_name=warehouse_name,
285
286
  warehouse_id=warehouse.warehouse_id
286
287
  )
288
+
287
289
  return warehouse
288
290
 
291
+ elif warehouse.warehouse_name == starter_warehouse:
292
+ starter_warehouse = warehouse
293
+
294
+ if starter_warehouse is not None:
295
+ return starter_warehouse
296
+
289
297
  if raise_error:
290
298
  v = warehouse_name or warehouse_id
291
299
 
@@ -3,28 +3,25 @@
3
3
  import base64
4
4
  import io
5
5
  import logging
6
+ import os
6
7
  import time
7
8
  from abc import ABC, abstractmethod
9
+ from tempfile import SpooledTemporaryFile
8
10
  from threading import Thread
9
- from typing import TYPE_CHECKING, Optional, IO, AnyStr, Union, Any
11
+ from typing import TYPE_CHECKING, Optional, IO, AnyStr, Union, Any, BinaryIO
10
12
 
11
13
  import dill
12
14
  import pyarrow as pa
13
15
  import pyarrow.csv as pcsv
14
16
  import pyarrow.parquet as pq
15
- from pyarrow.dataset import (
16
- FileFormat,
17
- ParquetFileFormat,
18
- CsvFileFormat,
19
- )
20
17
 
21
18
  from .path_kind import DatabricksPathKind
22
19
  from ...libs.databrickslib import databricks
23
20
  from ...libs.pandaslib import PandasDataFrame
24
21
  from ...libs.polarslib import polars, PolarsDataFrame
25
- from ...pyutils import retry
22
+ from ...pyutils.retry import retry
26
23
  from ...types.cast.registry import convert
27
- from ...types.file_format import ExcelFileFormat
24
+ from ...types.file_format import FileFormat, ParquetFileFormat, CsvFileFormat, ExcelFileFormat
28
25
 
29
26
  if databricks is not None:
30
27
  from databricks.sdk.service.workspace import ImportFormat, ExportFormat
@@ -45,7 +42,64 @@ __all__ = [
45
42
 
46
43
 
47
44
  LOGGER = logging.getLogger(__name__)
45
+ _SPOOL_MAX = 64 * 1024 * 1024 # 64MB in RAM then spill to disk
46
+ _COPY_CHUNK = 8 * 1024 * 1024 # 8MB chunks
47
+
48
+ def _prepare_binaryio_and_size(
49
+ data: Union[bytes, bytearray, memoryview, BinaryIO]
50
+ ) -> tuple[int, BinaryIO, bool]:
51
+ """
52
+ Returns (size, bio, should_close).
53
+
54
+ - bytes-like -> wrap in BytesIO (closeable by us).
55
+ - seekable file -> compute size via fstat or seek/tell.
56
+ - non-seekable stream -> spool into SpooledTemporaryFile, count bytes.
57
+ """
58
+ # bytes-like
59
+ if isinstance(data, (bytes, bytearray, memoryview)):
60
+ b = bytes(data)
61
+ return len(b), io.BytesIO(b), True
62
+
63
+ f: BinaryIO = data
64
+
65
+ # 1) try OS-level size for real files
66
+ try:
67
+ fileno = f.fileno() # type: ignore[attr-defined]
68
+ except Exception:
69
+ fileno = None
70
+
71
+ if fileno is not None:
72
+ try:
73
+ st = os.fstat(fileno)
74
+ # rewind if possible
75
+ try:
76
+ f.seek(0)
77
+ except Exception:
78
+ pass
79
+ return int(st.st_size), f, False
80
+ except Exception:
81
+ pass
82
+
83
+ # 2) try seek/tell (seekable streams)
84
+ try:
85
+ f.seek(0, io.SEEK_END)
86
+ end = f.tell()
87
+ f.seek(0)
88
+ return int(end), f, False
89
+ except Exception:
90
+ pass
48
91
 
92
+ # 3) non-seekable stream: spool + count
93
+ spooled = SpooledTemporaryFile(max_size=_SPOOL_MAX, mode="w+b")
94
+ size = 0
95
+ while True:
96
+ chunk = f.read(_COPY_CHUNK)
97
+ if not chunk:
98
+ break
99
+ spooled.write(chunk)
100
+ size += len(chunk)
101
+ spooled.seek(0)
102
+ return size, spooled, True
49
103
 
50
104
  class DatabricksIO(ABC, IO):
51
105
  """File-like interface for Databricks workspace, volume, or DBFS paths."""
@@ -102,7 +156,10 @@ class DatabricksIO(ABC, IO):
102
156
  return self.path.__hash__()
103
157
 
104
158
  def __str__(self):
105
- return self.path.__str__()
159
+ return "%s(path=%s)" % (
160
+ self.__class__.__name__,
161
+ self.path.__repr__()
162
+ )
106
163
 
107
164
  def __repr__(self):
108
165
  return "%s(path=%s)" % (
@@ -1081,9 +1138,9 @@ class DatabricksVolumeIO(DatabricksIO):
1081
1138
 
1082
1139
  try:
1083
1140
  resp = client.download(full_path)
1084
- except Exception as e:
1141
+ except (NotFound, ResourceDoesNotExist, BadRequest, InternalError) as e:
1085
1142
  # Databricks SDK exceptions vary a bit by version; keep it pragmatic.
1086
- if allow_not_found and any(s in str(e).lower() for s in ("not found", "not exist", "404")):
1143
+ if allow_not_found:
1087
1144
  return b""
1088
1145
  raise
1089
1146
 
@@ -1096,53 +1153,61 @@ class DatabricksVolumeIO(DatabricksIO):
1096
1153
  end = start + length
1097
1154
  return data[start:end]
1098
1155
 
1099
- @retry(exceptions=(InternalError,))
1100
- def write_all_bytes(self, data: Union[bytes, IO[bytes]]):
1101
- """Write bytes to a volume file.
1102
-
1103
- Args:
1104
- data: Union[bytes, IO[bytes]] to write.
1105
-
1106
- Returns:
1107
- The DatabricksVolumeIO instance.
1108
- """
1156
+ def write_all_bytes(
1157
+ self,
1158
+ data: Union[bytes, bytearray, memoryview, BinaryIO],
1159
+ *,
1160
+ overwrite: bool = True,
1161
+ part_size: Optional[int] = None,
1162
+ use_parallel: bool = True,
1163
+ parallelism: Optional[int] = None,
1164
+ ):
1165
+ """Write bytes/stream to a volume file safely (BinaryIO upload)."""
1109
1166
  sdk = self.workspace.sdk()
1110
1167
  client = sdk.files
1111
1168
  full_path = self.path.files_full_path()
1112
1169
 
1113
- LOGGER.debug(
1114
- "Writing all bytes in %s",
1115
- self
1116
- )
1170
+ LOGGER.debug("Writing all bytes in %s", self)
1117
1171
 
1118
- try:
1119
- client.upload(
1120
- full_path,
1121
- io.BytesIO(data),
1122
- overwrite=True
1123
- )
1124
- except (NotFound, ResourceDoesNotExist, BadRequest):
1125
- self.path.parent.mkdir(parents=True, exist_ok=True)
1172
+ size, bio, should_close = _prepare_binaryio_and_size(data)
1126
1173
 
1127
- client.upload(
1174
+ def _upload():
1175
+ return client.upload(
1128
1176
  full_path,
1129
- io.BytesIO(data),
1130
- overwrite=True
1177
+ bio,
1178
+ overwrite=overwrite,
1179
+ part_size=part_size,
1180
+ use_parallel=use_parallel,
1181
+ parallelism=parallelism,
1131
1182
  )
1132
1183
 
1133
- LOGGER.info(
1134
- "Written all bytes in %s",
1135
- self
1136
- )
1184
+ try:
1185
+ _ = _upload()
1186
+ except (NotFound, ResourceDoesNotExist, BadRequest, InternalError):
1187
+ self.path.parent.mkdir(parents=True, exist_ok=True)
1188
+ # Important: rewind if possible before retry
1189
+ try:
1190
+ bio.seek(0)
1191
+ except Exception:
1192
+ pass
1193
+ _ = _upload()
1194
+ finally:
1195
+ if should_close:
1196
+ try:
1197
+ bio.close()
1198
+ except Exception:
1199
+ pass
1137
1200
 
1138
1201
  self.path.reset_metadata(
1139
1202
  is_file=True,
1140
1203
  is_dir=False,
1141
- size=len(data),
1142
- mtime=time.time()
1204
+ size=size,
1205
+ mtime=time.time(),
1143
1206
  )
1144
1207
 
1145
- return self
1208
+ LOGGER.info("Written %s bytes in %s", size or "all", self.path)
1209
+
1210
+ return self # or return result if your API prefers that
1146
1211
 
1147
1212
 
1148
1213
  class DatabricksDBFSIO(DatabricksIO):
@@ -1,8 +1,6 @@
1
1
  """Databricks path abstraction spanning DBFS, workspace, and volumes."""
2
2
 
3
3
  # src/yggdrasil/databricks/workspaces/databricks_path.py
4
- from __future__ import annotations
5
-
6
4
  import dataclasses
7
5
  import datetime as dt
8
6
  import io
@@ -15,9 +13,7 @@ from typing import Optional, Tuple, Union, TYPE_CHECKING, List, Any, IO
15
13
 
16
14
  import dill
17
15
  import pyarrow as pa
18
- import pyarrow.dataset as ds
19
16
  from pyarrow import ArrowInvalid
20
- from pyarrow.dataset import FileFormat, ParquetFileFormat, CsvFileFormat, JsonFileFormat
21
17
  from pyarrow.fs import FileInfo, FileType, FileSystem
22
18
 
23
19
  from .io import DatabricksIO
@@ -25,12 +21,9 @@ from .path_kind import DatabricksPathKind
25
21
  from .volumes_path import get_volume_status, get_volume_metadata
26
22
  from ...libs.databrickslib import databricks
27
23
  from ...libs.pandaslib import PandasDataFrame
28
- from ...libs.polarslib import polars, PolarsDataFrame
29
- from ...types.cast.cast_options import CastOptions
30
- from ...types.cast.pandas_cast import pandas_converter, cast_pandas_dataframe
31
- from ...types.cast.polars_cast import polars_converter, cast_polars_dataframe
32
- from ...types.cast.registry import convert, register_converter
33
- from ...types.file_format import ExcelFileFormat
24
+ from ...libs.polarslib import polars
25
+ from ...types.cast.registry import convert
26
+ from ...types.file_format import FileFormat, ExcelFileFormat, ParquetFileFormat, JsonFileFormat, CsvFileFormat
34
27
 
35
28
  if databricks is not None:
36
29
  from databricks.sdk.errors import InternalError
@@ -1305,6 +1298,8 @@ class DatabricksPath:
1305
1298
  Returns:
1306
1299
  A PyArrow Dataset instance.
1307
1300
  """
1301
+ import pyarrow.dataset as ds
1302
+
1308
1303
  filesystem = self.filesystem(workspace=workspace) if filesystem is None else filesystem
1309
1304
 
1310
1305
  return ds.dataset(
@@ -1684,32 +1679,3 @@ class DatabricksPath:
1684
1679
  raise ValueError(
1685
1680
  "Invalid engine %s, must be in duckdb, polars" % engine
1686
1681
  )
1687
-
1688
- if databricks is not None:
1689
- @register_converter(DatabricksPath, ds.Dataset)
1690
- def databricks_path_to_arrow_table(
1691
- data: DatabricksPath,
1692
- options: Optional[CastOptions] = None,
1693
- ) -> ds.Dataset:
1694
- return data.arrow_dataset()
1695
-
1696
-
1697
- @pandas_converter(DatabricksPath, PandasDataFrame)
1698
- def databricks_path_to_pandas(
1699
- data: DatabricksPath,
1700
- options: Optional[CastOptions] = None,
1701
- ) -> PolarsDataFrame:
1702
- return cast_pandas_dataframe(
1703
- data.read_pandas(),
1704
- options
1705
- )
1706
-
1707
- @polars_converter(DatabricksPath, PolarsDataFrame)
1708
- def databricks_path_to_polars(
1709
- data: DatabricksPath,
1710
- options: Optional[CastOptions] = None,
1711
- ) -> PolarsDataFrame:
1712
- return cast_polars_dataframe(
1713
- data.read_polars(),
1714
- options
1715
- )
@@ -520,9 +520,9 @@ class Workspace:
520
520
  Returns:
521
521
  A DatabricksPath pointing at the shared cache location.
522
522
  """
523
- start = int(time.time() * 1000)
524
- max_lifetime = max_lifetime or 48.0 * 3600.0
525
- end = int(start + max_lifetime)
523
+ start = int(time.time())
524
+ max_lifetime = int(max_lifetime or 48 * 3600)
525
+ end = max(0, int(start + max_lifetime))
526
526
 
527
527
  base_path = base_path or self._base_tmp_path(
528
528
  catalog_name=catalog_name,
@@ -575,19 +575,15 @@ class Workspace:
575
575
  base_path
576
576
  )
577
577
 
578
- try:
579
- for path in base_path.ls(recursive=False, allow_not_found=True):
578
+ for path in base_path.ls(recursive=False, allow_not_found=True):
579
+ if path.name.startswith("tmp"):
580
580
  parts = path.name.split("-")
581
581
 
582
582
  if len(parts) > 2 and parts[0] == "tmp" and parts[1].isdigit() and parts[2].isdigit():
583
- end = int(parts[2]) / 1000.0
583
+ end = int(parts[2])
584
584
 
585
585
  if end and time.time() > end:
586
586
  path.remove(recursive=True)
587
- except Exception as e:
588
- if raise_error:
589
- raise e
590
- LOGGER.warning(e)
591
587
 
592
588
  LOGGER.info(
593
589
  "Cleaned temp path %s",
@@ -1,5 +1,5 @@
1
1
  """Optional Databricks SDK dependency helpers."""
2
- from yggdrasil.types.dummy_class import DummyModuleClass
2
+ from ..types.dummy_class import DummyModuleClass
3
3
 
4
4
 
5
5
  class DatabricksDummyClass(DummyModuleClass):
@@ -25,7 +25,7 @@ def require_databricks_sdk():
25
25
 
26
26
  try:
27
27
  import databricks
28
- import databricks.sdk # type: ignore
28
+ import databricks.sdk
29
29
 
30
30
  from databricks.sdk import WorkspaceClient
31
31
 
@@ -34,7 +34,6 @@ try:
34
34
  except ImportError:
35
35
  databricks = DatabricksDummyClass
36
36
  databricks_sdk = DatabricksDummyClass
37
-
38
37
  WorkspaceClient = DatabricksDummyClass
39
38
 
40
39
 
@@ -42,7 +42,7 @@ MODULE_PROJECT_NAMES_ALIASES = {
42
42
  "yggdrasil": "ygg",
43
43
  "jwt": "PyJWT",
44
44
  }
45
-
45
+ DEFAULT_PIP_INDEX_SETTINGS = None
46
46
 
47
47
  def module_name_to_project_name(module_name: str) -> str:
48
48
  """Map module import names to PyPI project names when they differ.
@@ -264,6 +264,11 @@ class PipIndexSettings:
264
264
  Returns:
265
265
  Default PipIndexSettings instance.
266
266
  """
267
+ global DEFAULT_PIP_INDEX_SETTINGS
268
+
269
+ if DEFAULT_PIP_INDEX_SETTINGS is None:
270
+ DEFAULT_PIP_INDEX_SETTINGS = get_pip_index_settings()
271
+
267
272
  return DEFAULT_PIP_INDEX_SETTINGS
268
273
 
269
274
  @property
@@ -363,9 +368,3 @@ def get_pip_index_settings() -> PipIndexSettings:
363
368
  extra_index_urls.append(u)
364
369
 
365
370
  return PipIndexSettings(index_url=index_url, extra_index_urls=extra_index_urls, sources=sources)
366
-
367
-
368
- try:
369
- DEFAULT_PIP_INDEX_SETTINGS = get_pip_index_settings()
370
- except:
371
- DEFAULT_PIP_INDEX_SETTINGS = PipIndexSettings()
@@ -27,7 +27,6 @@ log = logging.getLogger(__name__)
27
27
 
28
28
  class PythonEnvError(RuntimeError):
29
29
  """Raised when Python environment operations fail."""
30
-
31
30
  pass
32
31
 
33
32
 
@@ -72,6 +71,9 @@ _NON_PIPABLE_RE = re.compile(
72
71
  re.IGNORECASE,
73
72
  )
74
73
 
74
+ # Snapshot singleton (import-time)
75
+ CURRENT_PYTHON_ENV: "PythonEnv" = None
76
+
75
77
 
76
78
 
77
79
  def _filter_non_pipable_linux_packages(requirements: Iterable[str]) -> List[str]:
@@ -1508,11 +1510,3 @@ print("RESULT:" + json.dumps(top_level))""".strip()
1508
1510
  log.error("python_env CLI error: %s", e)
1509
1511
  print(f"ERROR: {e}", file=sys.stderr)
1510
1512
  return 2
1511
-
1512
-
1513
- # Snapshot singleton (import-time)
1514
- CURRENT_PYTHON_ENV: PythonEnv = None
1515
-
1516
-
1517
- if __name__ == "__main__":
1518
- raise SystemExit(PythonEnv.cli())
@@ -3,12 +3,8 @@
3
3
  # auth_session.py
4
4
  import os
5
5
  import time
6
- from typing import Any, Mapping, Optional
7
-
8
- import urllib3
9
-
10
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
11
- from dataclasses import dataclass
6
+ from dataclasses import dataclass, field
7
+ from typing import Any, Optional
12
8
 
13
9
  from .session import YGGSession
14
10
 
@@ -38,11 +34,11 @@ class MSALAuth:
38
34
  authority: Optional authority URL override.
39
35
  scopes: List of scopes to request.
40
36
  """
41
- tenant_id: Optional[str] = None
42
- client_id: Optional[str] = None
43
- client_secret: Optional[str] = None
44
- authority: Optional[str] = None
45
- scopes: list[str] | None = None
37
+ tenant_id: Optional[str] = field(default_factory=lambda: os.environ.get("AZURE_TENANT_ID"))
38
+ client_id: Optional[str] = field(default_factory=lambda: os.environ.get("AZURE_CLIENT_ID"))
39
+ client_secret: Optional[str] = field(default_factory=lambda: os.environ.get("AZURE_CLIENT_SECRET"))
40
+ authority: Optional[str] = field(default_factory=lambda: os.environ.get("AZURE_AUTHORITY"))
41
+ scopes: list[str] | None = field(default_factory=lambda: os.environ.get("AZURE_SCOPES"))
46
42
 
47
43
  _auth_app: ConfidentialClientApplication | None = None
48
44
  _expires_at: float | None = None
@@ -77,97 +73,15 @@ class MSALAuth:
77
73
  Returns:
78
74
  None.
79
75
  """
80
- self.tenant_id = self.tenant_id or os.environ.get("AZURE_TENANT_ID")
81
- self.client_id = self.client_id or os.environ.get("AZURE_CLIENT_ID")
82
- self.client_secret = self.client_secret or os.environ.get("AZURE_CLIENT_SECRET")
83
-
84
- self.authority = self.authority or os.environ.get("AZURE_AUTHORITY")
85
76
  if not self.authority:
77
+ assert self.tenant_id, "tenant_id is required to build authority URL"
78
+
86
79
  self.authority = f"https://login.microsoftonline.com/{self.tenant_id}"
87
80
 
88
- self.scopes = self.scopes or os.environ.get("AZURE_SCOPES")
89
81
  if self.scopes:
90
82
  if isinstance(self.scopes, str):
91
83
  self.scopes = self.scopes.split(",")
92
84
 
93
- self._validate_config()
94
-
95
- def _validate_config(self):
96
- """Validate that all required configuration is present.
97
-
98
- Returns:
99
- None.
100
- """
101
- missing = []
102
-
103
- if not self.client_id:
104
- missing.append("azure_client_id (AZURE_CLIENT_ID)")
105
- if not self.client_secret:
106
- missing.append("azure_client_secret (AZURE_CLIENT_SECRET)")
107
- if not self.tenant_id:
108
- missing.append("azure_client_secret (AZURE_TENANT_ID)")
109
- if not self.scopes:
110
- missing.append("scopes (AZURE_SCOPES)")
111
-
112
- if missing:
113
- raise ValueError(f"Missing required configuration: {', '.join(missing)}")
114
-
115
- @classmethod
116
- def find_in_env(
117
- cls,
118
- env: Mapping = None,
119
- prefix: Optional[str] = None
120
- ) -> "MSALAuth":
121
- """Return an MSALAuth built from environment variables if available.
122
-
123
- Args:
124
- env: Mapping to read variables from; defaults to os.environ.
125
- prefix: Optional prefix for variable names.
126
-
127
- Returns:
128
- A configured MSALAuth instance or None.
129
- """
130
- if not env:
131
- env = os.environ
132
- prefix = prefix or "AZURE_"
133
-
134
- required = {
135
- key: env.get(prefix + key.upper())
136
- for key in (
137
- "client_id", "client_secret", "tenant_id", "scopes"
138
- )
139
- }
140
-
141
- if all(required.values()):
142
- scopes = required["scopes"].split(",") if required["scopes"] else None
143
- return MSALAuth(
144
- tenant_id=required["tenant_id"],
145
- client_id=required["client_id"],
146
- client_secret=required["client_secret"],
147
- scopes=scopes,
148
- authority=env.get(prefix + "AUTHORITY"),
149
- )
150
-
151
- return None
152
-
153
- def export_to(self, to: dict = os.environ):
154
- """Export the auth configuration to the provided mapping.
155
-
156
- Args:
157
- to: Mapping to populate with auth configuration values.
158
-
159
- Returns:
160
- None.
161
- """
162
- for key, value in (
163
- ("AZURE_CLIENT_ID", self.client_id),
164
- ("AZURE_CLIENT_SECRET", self.client_secret),
165
- ("AZURE_AUTHORITY", self.authority),
166
- ("AZURE_SCOPES", ",".join(self.scopes)),
167
- ):
168
- if value:
169
- to[key] = value
170
-
171
85
  @property
172
86
  def auth_app(self) -> ConfidentialClientApplication:
173
87
  """Return or initialize the MSAL confidential client.
@@ -298,7 +212,6 @@ class MSALSession(YGGSession):
298
212
  super().__init__(*args, **kwargs)
299
213
  self.msal_auth = msal_auth
300
214
 
301
-
302
215
  def prepare_request(self, request):
303
216
  """Prepare the request with an Authorization header when needed.
304
217
 
@@ -0,0 +1,14 @@
1
+ from pyarrow.dataset import FileFormat, ParquetFileFormat, CsvFileFormat, JsonFileFormat
2
+
3
+
4
+ __all__ = [
5
+ "FileFormat",
6
+ "ExcelFileFormat",
7
+ "ParquetFileFormat",
8
+ "CsvFileFormat",
9
+ "JsonFileFormat"
10
+ ]
11
+
12
+
13
+ class ExcelFileFormat(FileFormat):
14
+ pass
@@ -18,84 +18,96 @@ __all__ = [
18
18
  "default_arrow_array"
19
19
  ]
20
20
 
21
+ DEFAULT_MAPS_INITIALIZED = False
21
22
 
22
23
  _NONE_TYPE = type(None)
23
- _PRIMITIVE_DEFAULTS = {
24
- str: "",
25
- int: 0,
26
- float: 0.0,
27
- bool: False,
28
- bytes: b"",
29
- }
30
-
31
- _SPECIAL_DEFAULTS = {
32
- datetime.datetime: lambda: datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc),
33
- datetime.date: lambda: datetime.date(1970, 1, 1),
34
- datetime.time: lambda: datetime.time(0, 0, 0, tzinfo=datetime.timezone.utc),
35
- datetime.timedelta: lambda: datetime.timedelta(0),
36
- uuid.UUID: lambda: uuid.UUID(int=0),
37
- decimal.Decimal: lambda: decimal.Decimal(0),
38
- }
39
-
40
- _ARROW_DEFAULTS = {
41
- pa.null(): pa.scalar(None, type=pa.null()),
42
-
43
- pa.bool_(): pa.scalar(False, type=pa.bool_()),
44
-
45
- pa.int8(): pa.scalar(0, type=pa.int8()),
46
- pa.int16(): pa.scalar(0, type=pa.int16()),
47
- pa.int32(): pa.scalar(0, type=pa.int32()),
48
- pa.int64(): pa.scalar(0, type=pa.int64()),
49
-
50
- pa.uint8(): pa.scalar(0, type=pa.uint8()),
51
- pa.uint16(): pa.scalar(0, type=pa.uint16()),
52
- pa.uint32(): pa.scalar(0, type=pa.uint32()),
53
- pa.uint64(): pa.scalar(0, type=pa.uint64()),
54
-
55
- # pa.float16(): pa.scalar(0.0, type=pa.float16()),
56
- pa.float32(): pa.scalar(0.0, type=pa.float32()),
57
- pa.float64(): pa.scalar(0.0, type=pa.float64()),
58
-
59
- pa.string(): pa.scalar("", type=pa.string()),
60
- pa.string_view(): pa.scalar("", type=pa.string_view()),
61
- pa.large_string(): pa.scalar("", type=pa.large_string()),
62
-
63
- pa.binary(): pa.scalar(b"", type=pa.binary()),
64
- pa.binary_view(): pa.scalar(b"", type=pa.binary_view()),
65
- pa.large_binary(): pa.scalar(b"", type=pa.large_binary()),
66
- }
67
-
68
-
69
- try:
70
- import polars
71
-
72
- polars = polars
73
-
74
- _POLARS_DEFAULTS = {
75
- polars.Null(): None,
76
- polars.Boolean(): False,
77
-
78
- polars.Binary(): b"",
79
-
80
- polars.Utf8(): "",
81
-
82
- polars.Int8(): 0,
83
- polars.Int16(): 0,
84
- polars.Int32(): 0,
85
- polars.Int64(): 0,
86
-
87
- polars.UInt8(): 0,
88
- polars.UInt16(): 0,
89
- polars.UInt32(): 0,
90
- polars.UInt64(): 0,
91
-
92
- polars.Float32(): 0.0,
93
- polars.Float64(): 0.0,
94
- }
95
- except ImportError:
96
- polars = None
97
-
98
- _POLARS_DEFAULTS = {}
24
+ _ARROW_DEFAULTS = {}
25
+ _POLARS_DEFAULTS = {}
26
+ _PRIMITIVE_DEFAULTS = {}
27
+ _SPECIAL_DEFAULTS = {}
28
+
29
+
30
+ def ensure_default_maps_initialized():
31
+ global DEFAULT_MAPS_INITIALIZED
32
+ global _PRIMITIVE_DEFAULTS
33
+ global _SPECIAL_DEFAULTS
34
+ global _ARROW_DEFAULTS
35
+ global _POLARS_DEFAULTS
36
+
37
+ if not DEFAULT_MAPS_INITIALIZED:
38
+ _PRIMITIVE_DEFAULTS = {
39
+ str: "",
40
+ int: 0,
41
+ float: 0.0,
42
+ bool: False,
43
+ bytes: b"",
44
+ }
45
+
46
+ _SPECIAL_DEFAULTS = {
47
+ datetime.datetime: lambda: datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc),
48
+ datetime.date: lambda: datetime.date(1970, 1, 1),
49
+ datetime.time: lambda: datetime.time(0, 0, 0, tzinfo=datetime.timezone.utc),
50
+ datetime.timedelta: lambda: datetime.timedelta(0),
51
+ uuid.UUID: lambda: uuid.UUID(int=0),
52
+ decimal.Decimal: lambda: decimal.Decimal(0),
53
+ }
54
+
55
+ _ARROW_DEFAULTS = {
56
+ pa.null(): pa.scalar(None, type=pa.null()),
57
+
58
+ pa.bool_(): pa.scalar(False, type=pa.bool_()),
59
+
60
+ pa.int8(): pa.scalar(0, type=pa.int8()),
61
+ pa.int16(): pa.scalar(0, type=pa.int16()),
62
+ pa.int32(): pa.scalar(0, type=pa.int32()),
63
+ pa.int64(): pa.scalar(0, type=pa.int64()),
64
+
65
+ pa.uint8(): pa.scalar(0, type=pa.uint8()),
66
+ pa.uint16(): pa.scalar(0, type=pa.uint16()),
67
+ pa.uint32(): pa.scalar(0, type=pa.uint32()),
68
+ pa.uint64(): pa.scalar(0, type=pa.uint64()),
69
+
70
+ # pa.float16(): pa.scalar(0.0, type=pa.float16()),
71
+ pa.float32(): pa.scalar(0.0, type=pa.float32()),
72
+ pa.float64(): pa.scalar(0.0, type=pa.float64()),
73
+
74
+ pa.string(): pa.scalar("", type=pa.string()),
75
+ pa.string_view(): pa.scalar("", type=pa.string_view()),
76
+ pa.large_string(): pa.scalar("", type=pa.large_string()),
77
+
78
+ pa.binary(): pa.scalar(b"", type=pa.binary()),
79
+ pa.binary_view(): pa.scalar(b"", type=pa.binary_view()),
80
+ pa.large_binary(): pa.scalar(b"", type=pa.large_binary()),
81
+ }
82
+
83
+ try:
84
+ import polars
85
+
86
+ _POLARS_DEFAULTS = {
87
+ polars.Null(): None,
88
+ polars.Boolean(): False,
89
+
90
+ polars.Binary(): b"",
91
+
92
+ polars.Utf8(): "",
93
+
94
+ polars.Int8(): 0,
95
+ polars.Int16(): 0,
96
+ polars.Int32(): 0,
97
+ polars.Int64(): 0,
98
+
99
+ polars.UInt8(): 0,
100
+ polars.UInt16(): 0,
101
+ polars.UInt32(): 0,
102
+ polars.UInt64(): 0,
103
+
104
+ polars.Float32(): 0.0,
105
+ polars.Float64(): 0.0,
106
+ }
107
+ except ImportError:
108
+ pass
109
+
110
+ DEFAULT_MAPS_INITIALIZED = True
99
111
 
100
112
  def _is_optional(hint) -> bool:
101
113
  """Return True when the type hint is Optional.
@@ -199,6 +211,8 @@ def default_arrow_scalar(
199
211
  Returns:
200
212
  Arrow scalar default.
201
213
  """
214
+ ensure_default_maps_initialized()
215
+
202
216
  if nullable:
203
217
  return pa.scalar(None, type=dtype)
204
218
 
@@ -307,6 +321,8 @@ def default_python_scalar(hint: Any):
307
321
  if _is_optional(hint):
308
322
  return None
309
323
 
324
+ ensure_default_maps_initialized()
325
+
310
326
  if hint in _PRIMITIVE_DEFAULTS:
311
327
  return _PRIMITIVE_DEFAULTS[hint]
312
328
 
@@ -0,0 +1 @@
1
+ __version__ = "0.1.64"
@@ -1,10 +0,0 @@
1
- from pyarrow.dataset import FileFormat
2
-
3
-
4
- __all__ = [
5
- "ExcelFileFormat"
6
- ]
7
-
8
-
9
- class ExcelFileFormat(FileFormat):
10
- pass
@@ -1 +0,0 @@
1
- __version__ = "0.1.60"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes