ygg 0.1.31__py3-none-any.whl → 0.1.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/METADATA +1 -1
  2. ygg-0.1.32.dist-info/RECORD +60 -0
  3. yggdrasil/__init__.py +2 -0
  4. yggdrasil/databricks/__init__.py +2 -0
  5. yggdrasil/databricks/compute/__init__.py +2 -0
  6. yggdrasil/databricks/compute/cluster.py +241 -2
  7. yggdrasil/databricks/compute/execution_context.py +100 -11
  8. yggdrasil/databricks/compute/remote.py +16 -0
  9. yggdrasil/databricks/jobs/__init__.py +5 -0
  10. yggdrasil/databricks/jobs/config.py +29 -4
  11. yggdrasil/databricks/sql/__init__.py +2 -0
  12. yggdrasil/databricks/sql/engine.py +217 -36
  13. yggdrasil/databricks/sql/exceptions.py +1 -0
  14. yggdrasil/databricks/sql/statement_result.py +147 -0
  15. yggdrasil/databricks/sql/types.py +33 -1
  16. yggdrasil/databricks/workspaces/__init__.py +2 -1
  17. yggdrasil/databricks/workspaces/filesytem.py +183 -0
  18. yggdrasil/databricks/workspaces/io.py +387 -9
  19. yggdrasil/databricks/workspaces/path.py +297 -2
  20. yggdrasil/databricks/workspaces/path_kind.py +3 -0
  21. yggdrasil/databricks/workspaces/workspace.py +202 -5
  22. yggdrasil/dataclasses/__init__.py +2 -0
  23. yggdrasil/dataclasses/dataclass.py +42 -1
  24. yggdrasil/libs/__init__.py +2 -0
  25. yggdrasil/libs/databrickslib.py +9 -0
  26. yggdrasil/libs/extensions/__init__.py +2 -0
  27. yggdrasil/libs/extensions/polars_extensions.py +72 -0
  28. yggdrasil/libs/extensions/spark_extensions.py +116 -0
  29. yggdrasil/libs/pandaslib.py +7 -0
  30. yggdrasil/libs/polarslib.py +7 -0
  31. yggdrasil/libs/sparklib.py +41 -0
  32. yggdrasil/pyutils/__init__.py +4 -0
  33. yggdrasil/pyutils/callable_serde.py +106 -0
  34. yggdrasil/pyutils/exceptions.py +16 -0
  35. yggdrasil/pyutils/modules.py +44 -1
  36. yggdrasil/pyutils/parallel.py +29 -0
  37. yggdrasil/pyutils/python_env.py +301 -0
  38. yggdrasil/pyutils/retry.py +57 -0
  39. yggdrasil/requests/__init__.py +4 -0
  40. yggdrasil/requests/msal.py +124 -3
  41. yggdrasil/requests/session.py +18 -0
  42. yggdrasil/types/__init__.py +2 -0
  43. yggdrasil/types/cast/__init__.py +2 -1
  44. yggdrasil/types/cast/arrow_cast.py +123 -1
  45. yggdrasil/types/cast/cast_options.py +119 -1
  46. yggdrasil/types/cast/pandas_cast.py +29 -0
  47. yggdrasil/types/cast/polars_cast.py +47 -0
  48. yggdrasil/types/cast/polars_pandas_cast.py +29 -0
  49. yggdrasil/types/cast/registry.py +176 -0
  50. yggdrasil/types/cast/spark_cast.py +76 -0
  51. yggdrasil/types/cast/spark_pandas_cast.py +29 -0
  52. yggdrasil/types/cast/spark_polars_cast.py +28 -0
  53. yggdrasil/types/libs.py +2 -0
  54. yggdrasil/types/python_arrow.py +191 -0
  55. yggdrasil/types/python_defaults.py +73 -0
  56. yggdrasil/version.py +1 -0
  57. ygg-0.1.31.dist-info/RECORD +0 -59
  58. {ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/WHEEL +0 -0
  59. {ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/entry_points.txt +0 -0
  60. {ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/licenses/LICENSE +0 -0
  61. {ygg-0.1.31.dist-info → ygg-0.1.32.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,5 @@
1
+ """Workspace configuration and Databricks SDK helpers."""
2
+
1
3
  import dataclasses
2
4
  import logging
3
5
  import os
@@ -17,13 +19,13 @@ if TYPE_CHECKING:
17
19
  from ..compute.cluster import Cluster
18
20
 
19
21
  from .path import DatabricksPath, DatabricksPathKind
22
+ from ...version import __version__ as YGGDRASIL_VERSION
20
23
  from ...libs.databrickslib import require_databricks_sdk, databricks_sdk
21
24
 
22
25
  if databricks_sdk is not None:
23
26
  from databricks.sdk import WorkspaceClient
24
- from databricks.sdk.errors import ResourceDoesNotExist, NotFound
27
+ from databricks.sdk.errors import ResourceDoesNotExist
25
28
  from databricks.sdk.service.workspace import ExportFormat, ObjectInfo
26
- from databricks.sdk.service import catalog as catalog_svc
27
29
  from databricks.sdk.dbutils import FileInfo
28
30
  from databricks.sdk.service.files import DirectoryEntry
29
31
 
@@ -45,7 +47,7 @@ def _get_env_product():
45
47
  v = os.getenv("DATABRICKS_PRODUCT")
46
48
 
47
49
  if not v:
48
- return None
50
+ return "yggdrasil"
49
51
  return v.strip().lower()
50
52
 
51
53
 
@@ -53,7 +55,7 @@ def _get_env_product_version():
53
55
  v = os.getenv("DATABRICKS_PRODUCT_VERSION")
54
56
 
55
57
  if not v:
56
- return None
58
+ return YGGDRASIL_VERSION
57
59
  return v.strip().lower()
58
60
 
59
61
 
@@ -67,6 +69,7 @@ def _get_env_product_tag():
67
69
 
68
70
  @dataclass
69
71
  class Workspace:
72
+ """Configuration wrapper for connecting to a Databricks workspace."""
70
73
  # Databricks / generic
71
74
  host: Optional[str] = None
72
75
  account_id: Optional[str] = None
@@ -113,6 +116,11 @@ class Workspace:
113
116
  # Pickle support
114
117
  # -------------------------
115
118
  def __getstate__(self):
119
+ """Serialize the workspace state for pickling.
120
+
121
+ Returns:
122
+ A pickle-ready state dictionary.
123
+ """
116
124
  state = self.__dict__.copy()
117
125
  state.pop("_sdk", None)
118
126
 
@@ -122,6 +130,11 @@ class Workspace:
122
130
  return state
123
131
 
124
132
  def __setstate__(self, state):
133
+ """Restore workspace state after unpickling.
134
+
135
+ Args:
136
+ state: Serialized state dictionary.
137
+ """
125
138
  self.__dict__.update(state)
126
139
  self._sdk = None
127
140
 
@@ -132,10 +145,25 @@ class Workspace:
132
145
  self.connect(reset=True)
133
146
 
134
147
  def __enter__(self) -> "Workspace":
148
+ """Enter a context manager and connect to the workspace.
149
+
150
+ Returns:
151
+ The connected Workspace instance.
152
+ """
135
153
  self._was_connected = self._sdk is not None
136
154
  return self.connect()
137
155
 
138
156
  def __exit__(self, exc_type, exc_val, exc_tb) -> None:
157
+ """Exit the context manager and close if newly connected.
158
+
159
+ Args:
160
+ exc_type: Exception type, if raised.
161
+ exc_val: Exception value, if raised.
162
+ exc_tb: Exception traceback, if raised.
163
+
164
+ Returns:
165
+ None.
166
+ """
139
167
  if not self._was_connected:
140
168
  self.close()
141
169
 
@@ -149,6 +177,14 @@ class Workspace:
149
177
  self,
150
178
  **kwargs
151
179
  ) -> "Workspace":
180
+ """Clone the workspace config with overrides.
181
+
182
+ Args:
183
+ **kwargs: Field overrides for the clone.
184
+
185
+ Returns:
186
+ A new Workspace instance with updated fields.
187
+ """
152
188
  state = self.__getstate__()
153
189
  state.update(kwargs)
154
190
  return Workspace().__setstate__(state)
@@ -158,9 +194,23 @@ class Workspace:
158
194
  # -------------------------
159
195
  @property
160
196
  def connected(self):
197
+ """Return True when a WorkspaceClient is cached.
198
+
199
+ Returns:
200
+ True if connected, otherwise False.
201
+ """
161
202
  return self._sdk is not None
162
203
 
163
204
  def connect(self, reset: bool = False, clone: bool = False) -> "Workspace":
205
+ """Connect to the workspace and cache the SDK client.
206
+
207
+ Args:
208
+ reset: Whether to reset the cached client before connecting.
209
+ clone: Whether to connect a cloned instance.
210
+
211
+ Returns:
212
+ The connected Workspace instance.
213
+ """
164
214
  if reset:
165
215
  self._sdk = None
166
216
 
@@ -270,6 +320,11 @@ class Workspace:
270
320
  return str(files[0]) if files else None
271
321
 
272
322
  def reset_local_cache(self):
323
+ """Remove cached browser OAuth tokens.
324
+
325
+ Returns:
326
+ None.
327
+ """
273
328
  local_cache = self._local_cache_token_path()
274
329
 
275
330
  if local_cache:
@@ -277,6 +332,11 @@ class Workspace:
277
332
 
278
333
  @property
279
334
  def current_user(self):
335
+ """Return the current Databricks user.
336
+
337
+ Returns:
338
+ The current user object from the SDK.
339
+ """
280
340
  try:
281
341
  return self.sdk().current_user.me()
282
342
  except:
@@ -285,6 +345,11 @@ class Workspace:
285
345
  raise
286
346
 
287
347
  def current_token(self) -> str:
348
+ """Return the active API token for this workspace.
349
+
350
+ Returns:
351
+ The bearer token string.
352
+ """
288
353
  if self.token:
289
354
  return self.token
290
355
 
@@ -301,6 +366,14 @@ class Workspace:
301
366
  self,
302
367
  workspace: Optional["Workspace"] = None,
303
368
  ):
369
+ """Return a PyArrow filesystem for Databricks paths.
370
+
371
+ Args:
372
+ workspace: Optional workspace override.
373
+
374
+ Returns:
375
+ A DatabricksFileSystem instance.
376
+ """
304
377
  from .filesytem import DatabricksFileSystem, DatabricksFileSystemHandler
305
378
 
306
379
  handler = DatabricksFileSystemHandler(
@@ -317,6 +390,16 @@ class Workspace:
317
390
  kind: Optional[DatabricksPathKind] = None,
318
391
  workspace: Optional["Workspace"] = None
319
392
  ):
393
+ """Create a DatabricksPath in this workspace.
394
+
395
+ Args:
396
+ parts: Path parts or string to parse.
397
+ kind: Optional path kind override.
398
+ workspace: Optional workspace override.
399
+
400
+ Returns:
401
+ A DatabricksPath instance.
402
+ """
320
403
  workspace = self if workspace is None else workspace
321
404
 
322
405
  if kind is None or isinstance(parts, str):
@@ -337,6 +420,12 @@ class Workspace:
337
420
  ) -> DatabricksPath:
338
421
  """
339
422
  Shared cache base under Volumes for the current user.
423
+
424
+ Args:
425
+ suffix: Optional path suffix to append.
426
+
427
+ Returns:
428
+ A DatabricksPath pointing at the shared cache location.
340
429
  """
341
430
  base = "/Workspace/Shared/.ygg/cache"
342
431
 
@@ -351,6 +440,11 @@ class Workspace:
351
440
  # ------------------------------------------------------------------ #
352
441
 
353
442
  def sdk(self) -> "WorkspaceClient":
443
+ """Return the connected WorkspaceClient.
444
+
445
+ Returns:
446
+ The WorkspaceClient instance.
447
+ """
354
448
  return self.connect()._sdk
355
449
 
356
450
  # ------------------------------------------------------------------ #
@@ -370,6 +464,13 @@ class Workspace:
370
464
  - other paths -> Workspace paths (sdk.workspace.list)
371
465
 
372
466
  If recursive=True, yield all nested files/directories.
467
+
468
+ Args:
469
+ path: Path string to list.
470
+ recursive: Whether to list recursively.
471
+
472
+ Returns:
473
+ An iterator of workspace/DBFS/volume entries.
373
474
  """
374
475
  sdk = self.sdk()
375
476
 
@@ -422,6 +523,13 @@ class Workspace:
422
523
  via workspace.download(...).
423
524
 
424
525
  Returned object is a BinaryIO context manager.
526
+
527
+ Args:
528
+ path: Path to open.
529
+ workspace_format: Optional export format for workspace paths.
530
+
531
+ Returns:
532
+ A BinaryIO stream for reading.
425
533
  """
426
534
  sdk = self.sdk()
427
535
 
@@ -437,9 +545,19 @@ class Workspace:
437
545
 
438
546
  @staticmethod
439
547
  def is_in_databricks_environment():
548
+ """Return True when running on a Databricks runtime.
549
+
550
+ Returns:
551
+ True if running on Databricks, otherwise False.
552
+ """
440
553
  return os.getenv("DATABRICKS_RUNTIME_VERSION") is not None
441
554
 
442
555
  def default_tags(self):
556
+ """Return default resource tags for Databricks assets.
557
+
558
+ Returns:
559
+ A dict of default tags.
560
+ """
443
561
  return {
444
562
  k: v
445
563
  for k, v in (
@@ -451,6 +569,14 @@ class Workspace:
451
569
  }
452
570
 
453
571
  def merge_tags(self, existing: dict | None = None):
572
+ """Merge default tags with an existing set.
573
+
574
+ Args:
575
+ existing: Optional existing tags.
576
+
577
+ Returns:
578
+ A dict of merged tags.
579
+ """
454
580
  if existing:
455
581
  return self.default_tags()
456
582
 
@@ -461,6 +587,17 @@ class Workspace:
461
587
  schema_name: Optional[str] = None,
462
588
  **kwargs
463
589
  ):
590
+ """Return a SQLEngine configured for this workspace.
591
+
592
+ Args:
593
+ workspace: Optional workspace override.
594
+ catalog_name: Optional catalog name.
595
+ schema_name: Optional schema name.
596
+ **kwargs: Additional SQLEngine parameters.
597
+
598
+ Returns:
599
+ A SQLEngine instance.
600
+ """
464
601
  from ..sql import SQLEngine
465
602
 
466
603
  return SQLEngine(
@@ -476,11 +613,20 @@ class Workspace:
476
613
  cluster_name: Optional[str] = None,
477
614
  **kwargs
478
615
  ) -> "Cluster":
616
+ """Return a Cluster helper bound to this workspace.
617
+
618
+ Args:
619
+ cluster_id: Optional cluster id.
620
+ cluster_name: Optional cluster name.
621
+ **kwargs: Additional Cluster parameters.
622
+
623
+ Returns:
624
+ A Cluster instance.
625
+ """
479
626
  from ..compute.cluster import Cluster
480
627
 
481
628
  return Cluster(workspace=self, cluster_id=cluster_id, cluster_name=cluster_name, **kwargs)
482
629
 
483
-
484
630
  # ---------------------------------------------------------------------------
485
631
  # Workspace-bound base class
486
632
  # ---------------------------------------------------------------------------
@@ -490,23 +636,54 @@ DBXWorkspace = Workspace
490
636
 
491
637
  @dataclass
492
638
  class WorkspaceService(ABC):
639
+ """Base class for helpers that depend on a Workspace."""
493
640
  workspace: Workspace = dataclasses.field(default_factory=Workspace)
494
641
 
495
642
  def __post_init__(self):
643
+ """Ensure a Workspace instance is available.
644
+
645
+ Returns:
646
+ None.
647
+ """
496
648
  if self.workspace is None:
497
649
  self.workspace = Workspace()
498
650
 
499
651
  def __enter__(self):
652
+ """Enter a context manager and connect the workspace.
653
+
654
+ Returns:
655
+ The current WorkspaceService instance.
656
+ """
500
657
  self.workspace.__enter__()
501
658
  return self
502
659
 
503
660
  def __exit__(self, exc_type, exc_val, exc_tb):
661
+ """Exit the context manager and close the workspace.
662
+
663
+ Args:
664
+ exc_type: Exception type, if raised.
665
+ exc_val: Exception value, if raised.
666
+ exc_tb: Exception traceback, if raised.
667
+
668
+ Returns:
669
+ None.
670
+ """
504
671
  self.workspace.__exit__(exc_type=exc_type, exc_val=exc_val, exc_tb=exc_tb)
505
672
 
506
673
  def is_in_databricks_environment(self):
674
+ """Return True when running on a Databricks runtime.
675
+
676
+ Returns:
677
+ True if running on Databricks, otherwise False.
678
+ """
507
679
  return self.workspace.is_in_databricks_environment()
508
680
 
509
681
  def connect(self):
682
+ """Connect the underlying workspace.
683
+
684
+ Returns:
685
+ The current WorkspaceService instance.
686
+ """
510
687
  self.workspace = self.workspace.connect()
511
688
  return self
512
689
 
@@ -516,6 +693,16 @@ class WorkspaceService(ABC):
516
693
  kind: Optional[DatabricksPathKind] = None,
517
694
  workspace: Optional["Workspace"] = None
518
695
  ):
696
+ """Create a DatabricksPath in the underlying workspace.
697
+
698
+ Args:
699
+ parts: Path parts or string to parse.
700
+ kind: Optional path kind override.
701
+ workspace: Optional workspace override.
702
+
703
+ Returns:
704
+ A DatabricksPath instance.
705
+ """
519
706
  return self.workspace.dbfs_path(
520
707
  kind=kind,
521
708
  parts=parts,
@@ -523,8 +710,18 @@ class WorkspaceService(ABC):
523
710
  )
524
711
 
525
712
  def sdk(self):
713
+ """Return the WorkspaceClient for the underlying workspace.
714
+
715
+ Returns:
716
+ The WorkspaceClient instance.
717
+ """
526
718
  return self.workspace.sdk()
527
719
 
528
720
  @property
529
721
  def current_user(self):
722
+ """Return the current Databricks user.
723
+
724
+ Returns:
725
+ The current user object from the SDK.
726
+ """
530
727
  return self.workspace.current_user
@@ -1,3 +1,5 @@
1
+ """Enhanced dataclass helpers with Arrow awareness."""
2
+
1
3
  from .dataclass import yggdataclass
2
4
 
3
5
  __all__ = ["yggdataclass"]
@@ -1,3 +1,5 @@
1
+ """Dataclass helpers that integrate with Arrow schemas and safe casting."""
2
+
1
3
  import dataclasses
2
4
  from inspect import isclass
3
5
  from typing import Any, Iterable, Mapping, Tuple
@@ -18,6 +20,7 @@ def is_yggdataclass(cls_or_instance: Any) -> bool:
18
20
 
19
21
  Args:
20
22
  cls_or_instance: The class or instance to check.
23
+
21
24
  Returns:
22
25
  True if the class or instance
23
26
  is a yggdrasil dataclass, False otherwise.
@@ -26,6 +29,14 @@ def is_yggdataclass(cls_or_instance: Any) -> bool:
26
29
 
27
30
 
28
31
  def get_dataclass_arrow_field(cls_or_instance: Any) -> pa.Field:
32
+ """Return a cached Arrow Field describing the dataclass type.
33
+
34
+ Args:
35
+ cls_or_instance: Dataclass class or instance.
36
+
37
+ Returns:
38
+ Arrow field describing the dataclass schema.
39
+ """
29
40
  if is_yggdataclass(cls_or_instance):
30
41
  return cls_or_instance.__arrow_field__()
31
42
 
@@ -58,7 +69,7 @@ def yggdataclass(
58
69
  kw_only=False, slots=False,
59
70
  weakref_slot=False
60
71
  ):
61
- """Add dunder methods based on the fields defined in the class.
72
+ """Decorate a class with dataclass behavior plus Arrow helpers.
62
73
 
63
74
  Examines PEP 526 __annotations__ to determine fields.
64
75
 
@@ -73,7 +84,24 @@ def yggdataclass(
73
84
  """
74
85
 
75
86
  def wrap(c):
87
+ """Wrap a class with yggdrasil dataclass enhancements.
88
+
89
+ Args:
90
+ c: Class to decorate.
91
+
92
+ Returns:
93
+ Decorated dataclass type.
94
+ """
95
+
76
96
  def _init_public_fields(cls):
97
+ """Return init-enabled, public dataclass fields.
98
+
99
+ Args:
100
+ cls: Dataclass type.
101
+
102
+ Returns:
103
+ List of dataclasses.Field objects.
104
+ """
77
105
  return [
78
106
  field
79
107
  for field in dataclasses.fields(cls)
@@ -83,6 +111,11 @@ def yggdataclass(
83
111
  if not hasattr(c, "default_instance"):
84
112
  @classmethod
85
113
  def default_instance(cls):
114
+ """Return a default instance built from type defaults.
115
+
116
+ Returns:
117
+ Default instance of the dataclass.
118
+ """
86
119
  from yggdrasil.types import default_scalar
87
120
 
88
121
  if not hasattr(cls, "__default_instance__"):
@@ -135,6 +168,14 @@ def yggdataclass(
135
168
  if not hasattr(c, "__arrow_field__"):
136
169
  @classmethod
137
170
  def __arrow_field__(cls, name: str | None = None):
171
+ """Return an Arrow field representing the dataclass schema.
172
+
173
+ Args:
174
+ name: Optional override for the field name.
175
+
176
+ Returns:
177
+ Arrow field describing the dataclass schema.
178
+ """
138
179
  from yggdrasil.types.python_arrow import arrow_field_from_hint
139
180
 
140
181
  return arrow_field_from_hint(cls, name=name)
@@ -1,3 +1,5 @@
1
+ """Helper utilities for optional dependency integrations."""
2
+
1
3
  from .sparklib import *
2
4
  from .polarslib import *
3
5
  from .pandaslib import *
@@ -1,3 +1,5 @@
1
+ """Optional Databricks SDK dependency helpers."""
2
+
1
3
  try:
2
4
  import databricks
3
5
  import databricks.sdk # type: ignore
@@ -6,7 +8,9 @@ try:
6
8
  databricks_sdk = databricks.sdk
7
9
  except ImportError:
8
10
  class _DatabricksDummy:
11
+ """Placeholder object that raises if Databricks SDK is required."""
9
12
  def __getattr__(self, item):
13
+ """Raise an error when accessing missing Databricks SDK attributes."""
10
14
  require_databricks_sdk()
11
15
 
12
16
  databricks = _DatabricksDummy
@@ -14,6 +18,11 @@ except ImportError:
14
18
 
15
19
 
16
20
  def require_databricks_sdk():
21
+ """Ensure the Databricks SDK is available before use.
22
+
23
+ Returns:
24
+ None.
25
+ """
17
26
  if databricks_sdk is None:
18
27
  raise ImportError(
19
28
  "databricks_sdk is required to use this function. "
@@ -1,2 +1,4 @@
1
+ """Extensions for Spark and Polars helpers."""
2
+
1
3
  from .spark_extensions import *
2
4
  from .polars_extensions import *
@@ -1,3 +1,5 @@
1
+ """Polars DataFrame extension helpers for joins and resampling."""
2
+
1
3
  from __future__ import annotations
2
4
 
3
5
  import datetime
@@ -39,6 +41,14 @@ def join_coalesced(
39
41
 
40
42
 
41
43
  def _normalize_group_by(group_by: str | Sequence[str] | None) -> list[str] | None:
44
+ """Normalize group_by inputs into a list or None.
45
+
46
+ Args:
47
+ group_by: Grouping column or columns.
48
+
49
+ Returns:
50
+ List of column names or None.
51
+ """
42
52
  if group_by is None:
43
53
  return None
44
54
  if isinstance(group_by, str):
@@ -57,6 +67,15 @@ def _filter_kwargs_for_callable(fn: object, kwargs: dict[str, Any]) -> dict[str,
57
67
 
58
68
 
59
69
  def _expr_from_agg(col: str, agg: Any) -> "pl.Expr":
70
+ """Build a Polars expression from an aggregation spec.
71
+
72
+ Args:
73
+ col: Column name to aggregate.
74
+ agg: Aggregation spec (expr, callable, or string).
75
+
76
+ Returns:
77
+ Polars expression.
78
+ """
60
79
  base = pl.col(col)
61
80
 
62
81
  if isinstance(agg, pl.Expr):
@@ -80,6 +99,14 @@ def _expr_from_agg(col: str, agg: Any) -> "pl.Expr":
80
99
 
81
100
 
82
101
  def _normalize_aggs(agg: AggSpec) -> list["pl.Expr"]:
102
+ """Normalize aggregation specs into a list of Polars expressions.
103
+
104
+ Args:
105
+ agg: Mapping or sequence of aggregation specs.
106
+
107
+ Returns:
108
+ List of Polars expressions.
109
+ """
83
110
  if isinstance(agg, Mapping):
84
111
  return [_expr_from_agg(col, spec) for col, spec in agg.items()]
85
112
 
@@ -91,11 +118,27 @@ def _normalize_aggs(agg: AggSpec) -> list["pl.Expr"]:
91
118
 
92
119
 
93
120
  def _is_datetime(dtype: object) -> bool:
121
+ """Return True when the dtype is a Polars datetime.
122
+
123
+ Args:
124
+ dtype: Polars dtype to inspect.
125
+
126
+ Returns:
127
+ True if dtype is Polars Datetime.
128
+ """
94
129
  # Datetime-only inference (per requirement), version-safe.
95
130
  return isinstance(dtype, pl.Datetime)
96
131
 
97
132
 
98
133
  def _infer_time_col(df: "pl.DataFrame") -> str:
134
+ """Infer the first datetime-like column name from a DataFrame.
135
+
136
+ Args:
137
+ df: Polars DataFrame to inspect.
138
+
139
+ Returns:
140
+ Column name of the first datetime field.
141
+ """
99
142
  # Find first Datetime column in schema order; ignore Date columns.
100
143
  for name, dtype in df.schema.items():
101
144
  if _is_datetime(dtype):
@@ -106,6 +149,15 @@ def _infer_time_col(df: "pl.DataFrame") -> str:
106
149
 
107
150
 
108
151
  def _ensure_datetime_like(df: "pl.DataFrame", time_col: str) -> "pl.DataFrame":
152
+ """Ensure a time column is cast to datetime for resampling.
153
+
154
+ Args:
155
+ df: Polars DataFrame.
156
+ time_col: Column name to validate.
157
+
158
+ Returns:
159
+ DataFrame with time column cast to datetime if needed.
160
+ """
109
161
  dtype = df.schema.get(time_col)
110
162
  if dtype is None:
111
163
  raise KeyError(f"resample: time_col '{time_col}' not found in DataFrame columns.")
@@ -151,6 +203,14 @@ def _timedelta_to_polars_duration(td: datetime.timedelta) -> str:
151
203
 
152
204
 
153
205
  def _normalize_duration(v: str | datetime.timedelta | None) -> str | None:
206
+ """Normalize duration inputs to a Polars duration string.
207
+
208
+ Args:
209
+ v: Duration string, timedelta, or None.
210
+
211
+ Returns:
212
+ Normalized duration string or None.
213
+ """
154
214
  if v is None:
155
215
  return None
156
216
  if isinstance(v, str):
@@ -168,6 +228,18 @@ def _upsample_single(
168
228
  offset: str | datetime.timedelta | None,
169
229
  keep_group_order: bool,
170
230
  ) -> "pl.DataFrame":
231
+ """Upsample a single DataFrame with normalized duration arguments.
232
+
233
+ Args:
234
+ df: Polars DataFrame to upsample.
235
+ time_col: Name of the time column.
236
+ every: Sampling interval.
237
+ offset: Optional offset interval.
238
+ keep_group_order: Preserve input order when grouping.
239
+
240
+ Returns:
241
+ Upsampled Polars DataFrame.
242
+ """
171
243
  df = df.sort(time_col)
172
244
 
173
245
  every_n = _normalize_duration(every)