ygg 0.1.21__tar.gz → 0.1.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {ygg-0.1.21 → ygg-0.1.24}/PKG-INFO +1 -1
  2. {ygg-0.1.21 → ygg-0.1.24}/pyproject.toml +1 -1
  3. {ygg-0.1.21 → ygg-0.1.24}/src/ygg.egg-info/PKG-INFO +1 -1
  4. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/databricks/compute/cluster.py +9 -0
  5. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/databricks/compute/execution_context.py +1 -1
  6. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/pyutils/python_env.py +72 -20
  7. {ygg-0.1.21 → ygg-0.1.24}/LICENSE +0 -0
  8. {ygg-0.1.21 → ygg-0.1.24}/README.md +0 -0
  9. {ygg-0.1.21 → ygg-0.1.24}/setup.cfg +0 -0
  10. {ygg-0.1.21 → ygg-0.1.24}/src/ygg.egg-info/SOURCES.txt +0 -0
  11. {ygg-0.1.21 → ygg-0.1.24}/src/ygg.egg-info/dependency_links.txt +0 -0
  12. {ygg-0.1.21 → ygg-0.1.24}/src/ygg.egg-info/entry_points.txt +0 -0
  13. {ygg-0.1.21 → ygg-0.1.24}/src/ygg.egg-info/requires.txt +0 -0
  14. {ygg-0.1.21 → ygg-0.1.24}/src/ygg.egg-info/top_level.txt +0 -0
  15. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/__init__.py +0 -0
  16. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/databricks/__init__.py +0 -0
  17. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/databricks/compute/__init__.py +0 -0
  18. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/databricks/compute/remote.py +0 -0
  19. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/databricks/jobs/__init__.py +0 -0
  20. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/databricks/jobs/config.py +0 -0
  21. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/databricks/sql/__init__.py +0 -0
  22. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/databricks/sql/engine.py +0 -0
  23. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/databricks/sql/exceptions.py +0 -0
  24. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/databricks/sql/statement_result.py +0 -0
  25. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/databricks/sql/types.py +0 -0
  26. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/databricks/workspaces/__init__.py +0 -0
  27. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/databricks/workspaces/databricks_path.py +0 -0
  28. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/databricks/workspaces/workspace.py +0 -0
  29. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/dataclasses/__init__.py +0 -0
  30. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/dataclasses/dataclass.py +0 -0
  31. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/libs/__init__.py +0 -0
  32. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/libs/databrickslib.py +0 -0
  33. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/libs/extensions/__init__.py +0 -0
  34. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/libs/extensions/polars_extensions.py +0 -0
  35. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/libs/extensions/spark_extensions.py +0 -0
  36. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/libs/pandaslib.py +0 -0
  37. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/libs/polarslib.py +0 -0
  38. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/libs/sparklib.py +0 -0
  39. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/pyutils/__init__.py +0 -0
  40. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/pyutils/callable_serde.py +0 -0
  41. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/pyutils/exceptions.py +0 -0
  42. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/pyutils/modules.py +0 -0
  43. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/pyutils/parallel.py +0 -0
  44. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/pyutils/retry.py +0 -0
  45. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/requests/__init__.py +0 -0
  46. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/requests/msal.py +0 -0
  47. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/requests/session.py +0 -0
  48. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/types/__init__.py +0 -0
  49. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/types/cast/__init__.py +0 -0
  50. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/types/cast/arrow_cast.py +0 -0
  51. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/types/cast/cast_options.py +0 -0
  52. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/types/cast/pandas_cast.py +0 -0
  53. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/types/cast/polars_cast.py +0 -0
  54. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/types/cast/polars_pandas_cast.py +0 -0
  55. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/types/cast/registry.py +0 -0
  56. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/types/cast/spark_cast.py +0 -0
  57. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/types/cast/spark_pandas_cast.py +0 -0
  58. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/types/cast/spark_polars_cast.py +0 -0
  59. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/types/libs.py +0 -0
  60. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/types/python_arrow.py +0 -0
  61. {ygg-0.1.21 → ygg-0.1.24}/src/yggdrasil/types/python_defaults.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.21
3
+ Version: 0.1.24
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ygg"
7
- version = "0.1.21"
7
+ version = "0.1.24"
8
8
  description = "Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks"
9
9
  readme = { file = "README.md", content-type = "text/markdown" }
10
10
  license = { file = "LICENSE" }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ygg
3
- Version: 0.1.21
3
+ Version: 0.1.24
4
4
  Summary: Type-friendly utilities for moving data between Python objects, Arrow, Polars, Pandas, Spark, and Databricks
5
5
  Author: Yggdrasil contributors
6
6
  License: Apache License
@@ -390,6 +390,8 @@ class Cluster(WorkspaceService):
390
390
  python_version: Optional[Union[str, tuple[int, ...]]] = None,
391
391
  **kwargs
392
392
  ):
393
+ pip_settings = PipIndexSettings.default_settings()
394
+
393
395
  if kwargs:
394
396
  details = ClusterDetails(**{
395
397
  **details.as_shallow_dict(),
@@ -421,6 +423,13 @@ class Cluster(WorkspaceService):
421
423
  if details.is_single_node is not None and details.kind is None:
422
424
  details.kind = Kind.CLASSIC_PREVIEW
423
425
 
426
+ if pip_settings.extra_index_urls:
427
+ if details.spark_env_vars is None:
428
+ details.spark_env_vars = {}
429
+ str_urls = " ".join(pip_settings.extra_index_urls)
430
+ details.spark_env_vars["UV_EXTRA_INDEX_URL"] = details.spark_env_vars.get("UV_INDEX", str_urls)
431
+ details.spark_env_vars["PIP_EXTRA_INDEX_URL"] = details.spark_env_vars.get("PIP_EXTRA_INDEX_URL", str_urls)
432
+
424
433
  return details
425
434
 
426
435
  def create_or_update(
@@ -120,7 +120,7 @@ class ExecutionContext:
120
120
  cmd = r"""import glob
121
121
  import json
122
122
  import os
123
- from yggdrasil.pyutils import PythonEnv
123
+ from yggdrasil.pyutils.python_env import PythonEnv
124
124
 
125
125
  current_env = PythonEnv.get_current()
126
126
  meta = {}
@@ -17,7 +17,7 @@ import zipfile
17
17
  from contextlib import contextmanager
18
18
  from dataclasses import dataclass
19
19
  from pathlib import Path, PurePosixPath
20
- from typing import Any, Iterable, Iterator, Mapping, MutableMapping, Optional, Union
20
+ from typing import Any, Iterable, Iterator, Mapping, MutableMapping, Optional, Union, List
21
21
 
22
22
  log = logging.getLogger(__name__)
23
23
 
@@ -40,6 +40,55 @@ _LOCKS_GUARD: threading.RLock = threading.RLock()
40
40
 
41
41
  # Installed into newly created envs (and on create() upsert when env already exists)
42
42
  DEFAULT_CREATE_PACKAGES: tuple[str, ...] = ("uv", "ygg")
43
+ _NON_PIPABLE_NAMES = (
44
+ "python-apt",
45
+ "python3-apt",
46
+ "unattended-upgrades",
47
+ "apt",
48
+ "apt-utils",
49
+ "dpkg",
50
+ "adduser",
51
+ "lsb-release",
52
+ "software-properties-common",
53
+ "systemd",
54
+ "udev",
55
+ "dbus",
56
+ )
57
+
58
+ # Matches a requirement line that starts with one of those names, followed by:
59
+ # - end of line
60
+ # - a version specifier (==,>=, etc)
61
+ # - extras ([...])
62
+ # - an environment marker (; ...)
63
+ _NON_PIPABLE_RE = re.compile(
64
+ r"^\s*(?:"
65
+ + "|".join(re.escape(n) for n in _NON_PIPABLE_NAMES)
66
+ + r")(?=\s*(?:$|==|~=|!=|<=|>=|<|>|\[|;))",
67
+ re.IGNORECASE,
68
+ )
69
+
70
+
71
+
72
+ def _filter_non_pipable_linux_packages(requirements: Iterable[str]) -> List[str]:
73
+ """
74
+ Remove Linux OS-level packages that aren't realistically installable via pip/uv on Databricks.
75
+ Keeps comments/blank lines as-is.
76
+ """
77
+ out: List[str] = []
78
+ for line in requirements:
79
+ s = line.strip()
80
+
81
+ # keep empty lines and comments
82
+ if not s or s.startswith("#"):
83
+ out.append(line)
84
+ continue
85
+
86
+ if _NON_PIPABLE_RE.match(s):
87
+ continue
88
+
89
+ out.append(line)
90
+
91
+ return out
43
92
 
44
93
 
45
94
  def _is_windows() -> bool:
@@ -464,7 +513,7 @@ class PythonEnv:
464
513
 
465
514
  env_obj = cls(root)
466
515
  if not env_obj.exists():
467
- raise PythonEnvError(f"Created env but python missing: {env_obj.python}")
516
+ raise PythonEnvError(f"Created env but python missing: {env_obj.python_executable}")
468
517
 
469
518
  env_obj.update(
470
519
  packages=install_pkgs,
@@ -560,17 +609,18 @@ class PythonEnv:
560
609
  )
561
610
 
562
611
  extra = list(pip_args or [])
563
- base = [uv, "pip", "install", "--python", str(env_obj.python)]
612
+ base_uv = [uv, "pip", "install", "--python", str(env_obj.python_executable)]
564
613
 
565
614
  if upgrade_pip:
566
615
  log.info("upgrading pip in env: %s", str(env_obj.root))
567
- _run_cmd(base + ["-U", "pip"] + extra, cwd=cwd, env=env, check=check)
616
+ _run_cmd(base_uv + ["-U", "pip"] + extra, cwd=cwd, env=env, check=check)
568
617
 
569
618
  if packages:
570
619
  pkgs = [packages] if isinstance(packages, str) else list(packages)
620
+
571
621
  if pkgs:
572
622
  log.info("installing packages into env %s: %s", str(env_obj.root), pkgs)
573
- _run_cmd(base + ["-U"] + pkgs + extra, cwd=cwd, env=env, check=check)
623
+ _run_cmd(base_uv + ["-U"] + pkgs + extra, cwd=cwd, env=env, check=check)
574
624
 
575
625
  if requirements:
576
626
  # requirements can be:
@@ -598,7 +648,7 @@ class PythonEnv:
598
648
  raise PythonEnvError("requirements must be a path-like string/Path or raw requirements text")
599
649
 
600
650
  log.info("installing requirements into env %s: %s", str(env_obj.root), str(req_path))
601
- _run_cmd(base + ["-U", "-r", str(req_path)] + extra, cwd=cwd, env=env, check=check)
651
+ _run_cmd(base_uv + ["-U", "-r", str(req_path)] + extra, cwd=cwd, env=env, check=check)
602
652
 
603
653
  finally:
604
654
  if tmp_ctx is not None:
@@ -634,12 +684,12 @@ class PythonEnv:
634
684
  return n if n else str(self.root)
635
685
 
636
686
  @property
637
- def python(self) -> Path:
687
+ def python_executable(self) -> Path:
638
688
  exe = "python.exe" if _is_windows() else "python"
639
689
  return self.bindir / exe
640
690
 
641
691
  def exists(self) -> bool:
642
- return self.python.exists()
692
+ return self.python_executable.exists()
643
693
 
644
694
  @property
645
695
  def version(self) -> str:
@@ -714,7 +764,7 @@ class PythonEnv:
714
764
 
715
765
  frozen_text: Optional[str] = None
716
766
  if keep_packages and self.exists():
717
- p = _run_cmd([uv, "pip", "freeze", "--python", str(self.python)], check=True)
767
+ p = _run_cmd([uv, "pip", "freeze", "--python", str(self.python_executable)], check=True)
718
768
  frozen_text = p.stdout or ""
719
769
 
720
770
  with _locked_env(root):
@@ -731,7 +781,7 @@ class PythonEnv:
731
781
 
732
782
  new_env = self.__class__(root)
733
783
  if not new_env.exists():
734
- raise PythonEnvError(f"Recreated env but python missing: {new_env.python}")
784
+ raise PythonEnvError(f"Recreated env but python missing: {new_env.python_executable}")
735
785
 
736
786
  if keep_packages and frozen_text and frozen_text.strip():
737
787
  import datetime as _dt
@@ -740,7 +790,7 @@ class PythonEnv:
740
790
  req_path = parent / f".{root.name}.freeze-{ts}.txt"
741
791
  req_path.write_text(frozen_text, encoding="utf-8")
742
792
  try:
743
- _run_cmd([uv, "pip", "install", "--python", str(new_env.python), "-r", str(req_path)], check=True)
793
+ _run_cmd([uv, "pip", "install", "--python", str(new_env.python_executable), "-r", str(req_path)], check=True)
744
794
  finally:
745
795
  try:
746
796
  req_path.unlink(missing_ok=True)
@@ -787,12 +837,12 @@ class PythonEnv:
787
837
  """
788
838
  if not python_versions:
789
839
  return self.export_requirements_matrix(
790
- python_versions=[self.python],
840
+ python_versions=[self.python_executable],
791
841
  out_dir=out_dir, base_name=base_name, include_frozen=include_frozen,
792
842
  include_input=include_input, check=check, buffers=buffers or {},
793
843
  uv_upgrade=uv_upgrade, uv_user=uv_user, uv_index_url=uv_index_url,
794
844
  uv_extra_pip_args=uv_extra_pip_args
795
- )[str(self.python)]
845
+ )[str(self.python_executable)]
796
846
 
797
847
  def _slug(s: str) -> str:
798
848
  s = (s or "").strip()
@@ -806,7 +856,7 @@ class PythonEnv:
806
856
  raise PythonEnvError("python_versions cannot be empty")
807
857
 
808
858
  if not self.exists():
809
- raise PythonEnvError(f"Python executable not found in env: {self.python}")
859
+ raise PythonEnvError(f"Python executable not found in env: {self.python_executable}")
810
860
 
811
861
  uv = self.__class__.ensure_uv(
812
862
  check=check,
@@ -878,7 +928,9 @@ print("RESULT:" + json.dumps(top_level))""".strip()
878
928
  if not isinstance(top_level, list) or not all(isinstance(x, str) for x in top_level):
879
929
  raise PythonEnvError(f"Unexpected top-level requirements payload: {top_level!r}")
880
930
 
881
- req_in_text = "\n".join(top_level) + "\n"
931
+ filtered = _filter_non_pipable_linux_packages(top_level)
932
+
933
+ req_in_text = "\n".join(filtered) + "\n"
882
934
  req_in_path.write_text(req_in_text, encoding="utf-8")
883
935
  if buffers is not None:
884
936
  buffers[f"{base_name}.in"] = req_in_text
@@ -887,7 +939,7 @@ print("RESULT:" + json.dumps(top_level))""".strip()
887
939
  frozen_path = out_root / f"{base_name}.frozen.txt"
888
940
  if include_frozen:
889
941
  log.info("exporting frozen requirements: %s", str(frozen_path))
890
- p = _run_cmd([uv, "pip", "freeze", "--python", str(self.python)], check=check)
942
+ p = _run_cmd([uv, "pip", "freeze", "--python", str(self.python_executable)], check=check)
891
943
  frozen_text = p.stdout or ""
892
944
  if write_files:
893
945
  frozen_path.write_text(frozen_text, encoding="utf-8")
@@ -944,8 +996,8 @@ print("RESULT:" + json.dumps(top_level))""".strip()
944
996
  # pick interpreter (default = env python)
945
997
  if python is None:
946
998
  if not self.exists():
947
- raise PythonEnvError(f"Python executable not found in env: {self.python}")
948
- py = self.python
999
+ raise PythonEnvError(f"Python executable not found in env: {self.python_executable}")
1000
+ py = self.python_executable
949
1001
  else:
950
1002
  py = Path(python).expanduser().resolve() if isinstance(python, Path) else Path(str(python)).expanduser().resolve()
951
1003
  if not py.exists():
@@ -1277,7 +1329,7 @@ print("RESULT:" + json.dumps(top_level))""".strip()
1277
1329
  require_python=require_python,
1278
1330
  dedupe=True,
1279
1331
  ):
1280
- py = str(e.python) if e.exists() else "-"
1332
+ py = str(e.python_executable) if e.exists() else "-"
1281
1333
  print(f"{e.name}\t{e.root}\t{py}")
1282
1334
  return 0
1283
1335
 
@@ -1323,7 +1375,7 @@ print("RESULT:" + json.dumps(top_level))""".strip()
1323
1375
  if args.cmd == "current":
1324
1376
  cur = cls.get_current()
1325
1377
  print(f"root={cur.root}")
1326
- print(f"python={cur.python}")
1378
+ print(f"python={cur.python_executable}")
1327
1379
  return 0
1328
1380
 
1329
1381
  raise PythonEnvError(f"Unknown command: {args.cmd}")
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes