PyPI - xax - Versions diffs - 0.2.6__tar.gz → 0.2.8__tar.gz - Mend

xax 0.2.6tar.gz → 0.2.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

{xax-0.2.6/xax.egg-info → xax-0.2.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: xax
-Version: 0.2.6
+Version: 0.2.8
 Summary: A library for fast Jax experimentation
 Home-page: https://github.com/kscalelabs/xax
 Author: Benjamin Bolte

{xax-0.2.6 → xax-0.2.8}/pyproject.toml RENAMED Viewed

@@ -57,6 +57,7 @@ target-version = "py311"
 select = [
     "ANN",
+    "B",
     "D",
     "E",
     "F",

{xax-0.2.6 → xax-0.2.8}/xax/__init__.py RENAMED Viewed

@@ -12,7 +12,7 @@ and running the update script:
     python -m scripts.update_api --inplace
 """
-__version__ = "0.2.6"
+__version__ = "0.2.8"
 # This list shouldn't be modified by hand; instead, run the update script.
 __all__ = [
@@ -43,9 +43,12 @@ __all__ = [
     "cubic_bezier_interpolation",
     "euler_to_quat",
     "get_projected_gravity_vector_from_quat",
+    "normalize",
     "quat_to_euler",
     "quat_to_rotmat",
     "rotate_vector_by_quat",
+    "rotation6d_to_rotation_matrix",
+    "rotation_matrix_to_rotation6d",
     "cross_entropy",
     "cast_norm_type",
     "get_norm",
@@ -57,8 +60,18 @@ __all__ = [
     "BaseLauncher",
     "CliLauncher",
     "SingleProcessLauncher",
+    "LogDistribution",
+    "LogError",
+    "LogErrorSummary",
+    "LogGraph",
+    "LogHistogram",
     "LogImage",
     "LogLine",
+    "LogMesh",
+    "LogPing",
+    "LogScalar",
+    "LogStatus",
+    "LogVideo",
     "Logger",
     "LoggerImpl",
     "CallbackLogger",
@@ -66,6 +79,7 @@ __all__ = [
     "StateLogger",
     "StdoutLogger",
     "TensorboardLogger",
+    "load_ckpt",
     "CPUStatsOptions",
     "DataloaderConfig",
     "GPUStatsOptions",
@@ -115,6 +129,7 @@ __all__ = [
     "compute_nan_ratio",
     "flatten_array",
     "flatten_pytree",
+    "get_pytree_param_count",
     "pytree_has_nans",
     "reshuffle_pytree",
     "reshuffle_pytree_along_dims",
@@ -207,9 +222,12 @@ NAME_MAP: dict[str, str] = {
     "cubic_bezier_interpolation": "nn.geom",
     "euler_to_quat": "nn.geom",
     "get_projected_gravity_vector_from_quat": "nn.geom",
+    "normalize": "nn.geom",
     "quat_to_euler": "nn.geom",
     "quat_to_rotmat": "nn.geom",
     "rotate_vector_by_quat": "nn.geom",
+    "rotation6d_to_rotation_matrix": "nn.geom",
+    "rotation_matrix_to_rotation6d": "nn.geom",
     "cross_entropy": "nn.losses",
     "cast_norm_type": "nn.norm",
     "get_norm": "nn.norm",
@@ -221,8 +239,18 @@ NAME_MAP: dict[str, str] = {
     "BaseLauncher": "task.launchers.base",
     "CliLauncher": "task.launchers.cli",
     "SingleProcessLauncher": "task.launchers.single_process",
+    "LogDistribution": "task.logger",
+    "LogError": "task.logger",
+    "LogErrorSummary": "task.logger",
+    "LogGraph": "task.logger",
+    "LogHistogram": "task.logger",
     "LogImage": "task.logger",
     "LogLine": "task.logger",
+    "LogMesh": "task.logger",
+    "LogPing": "task.logger",
+    "LogScalar": "task.logger",
+    "LogStatus": "task.logger",
+    "LogVideo": "task.logger",
     "Logger": "task.logger",
     "LoggerImpl": "task.logger",
     "CallbackLogger": "task.loggers.callback",
@@ -230,6 +258,7 @@ NAME_MAP: dict[str, str] = {
     "StateLogger": "task.loggers.state",
     "StdoutLogger": "task.loggers.stdout",
     "TensorboardLogger": "task.loggers.tensorboard",
+    "load_ckpt": "task.mixins.checkpointing",
     "CPUStatsOptions": "task.mixins.cpu_stats",
     "DataloaderConfig": "task.mixins.data_loader",
     "GPUStatsOptions": "task.mixins.gpu_stats",
@@ -279,6 +308,7 @@ NAME_MAP: dict[str, str] = {
     "compute_nan_ratio": "utils.pytree",
     "flatten_array": "utils.pytree",
     "flatten_pytree": "utils.pytree",
+    "get_param_count": "utils.pytree",
     "pytree_has_nans": "utils.pytree",
     "reshuffle_pytree": "utils.pytree",
     "reshuffle_pytree_along_dims": "utils.pytree",
@@ -372,9 +402,12 @@ if IMPORT_ALL or TYPE_CHECKING:
         cubic_bezier_interpolation,
         euler_to_quat,
         get_projected_gravity_vector_from_quat,
+        normalize,
         quat_to_euler,
         quat_to_rotmat,
         rotate_vector_by_quat,
+        rotation6d_to_rotation_matrix,
+        rotation_matrix_to_rotation6d,
     )
     from xax.nn.losses import cross_entropy
     from xax.nn.norm import NormType, cast_norm_type, get_norm
@@ -384,12 +417,28 @@ if IMPORT_ALL or TYPE_CHECKING:
     from xax.task.launchers.base import BaseLauncher
     from xax.task.launchers.cli import CliLauncher
     from xax.task.launchers.single_process import SingleProcessLauncher
-    from xax.task.logger import Logger, LoggerImpl, LogImage, LogLine
+    from xax.task.logger import (
+        LogDistribution,
+        LogError,
+        LogErrorSummary,
+        Logger,
+        LoggerImpl,
+        LogGraph,
+        LogHistogram,
+        LogImage,
+        LogLine,
+        LogMesh,
+        LogPing,
+        LogScalar,
+        LogStatus,
+        LogVideo,
+    )
     from xax.task.loggers.callback import CallbackLogger
     from xax.task.loggers.json import JsonLogger
     from xax.task.loggers.state import StateLogger
     from xax.task.loggers.stdout import StdoutLogger
     from xax.task.loggers.tensorboard import TensorboardLogger
+    from xax.task.mixins.checkpointing import load_ckpt
     from xax.task.mixins.cpu_stats import CPUStatsOptions
     from xax.task.mixins.data_loader import DataloaderConfig
     from xax.task.mixins.gpu_stats import GPUStatsOptions
@@ -439,6 +488,7 @@ if IMPORT_ALL or TYPE_CHECKING:
         compute_nan_ratio,
         flatten_array,
         flatten_pytree,
+        get_pytree_param_count,
         pytree_has_nans,
         reshuffle_pytree,
         reshuffle_pytree_along_dims,

{xax-0.2.6 → xax-0.2.8}/xax/core/conf.py RENAMED Viewed

@@ -26,7 +26,7 @@ def field(value: FieldType, **kwargs: str) -> FieldType:
     metadata: dict[str, Any] = {}
     metadata.update(kwargs)
-    if hasattr(value, "__call__"):
+    if hasattr(value, "__call__"):  # noqa: B004
         return field_base(default_factory=value, metadata=metadata)
     if value.__class__.__hash__ is None:
         return field_base(default_factory=lambda: value, metadata=metadata)

{xax-0.2.6 → xax-0.2.8}/xax/nn/equinox.py RENAMED Viewed

@@ -68,8 +68,8 @@ def _infer_activation(activation: ActivationFunction) -> Callable:
         return lambda x: x
     try:
         return getattr(jax.nn, activation)
-    except AttributeError:
-        raise ValueError(f"Activation function `{activation}` not found in `jax.nn`")
+    except AttributeError as err:
+        raise ValueError(f"Activation function `{activation}` not found in `jax.nn`") from err
 def make_eqx_mlp(hyperparams: MLPHyperParams, *, key: PRNGKeyArray) -> eqx.nn.MLP:
@@ -100,7 +100,7 @@ def make_eqx_mlp(hyperparams: MLPHyperParams, *, key: PRNGKeyArray) -> eqx.nn.ML
 def export_eqx_mlp(
     model: eqx.nn.MLP,
     output_path: str | Path,
-    dtype: jax.numpy.dtype = eqx._misc.default_floating_dtype(),
+    dtype: jax.numpy.dtype | None = None,
 ) -> None:
     """Serialize an Equinox MLP to a .eqx file.
@@ -109,6 +109,9 @@ def export_eqx_mlp(
         output_path: The path to save the exported model.
         dtype: The dtype of the model.
     """
+    if dtype is None:
+        dtype = eqx._misc.default_floating_dtype()
     activation = model.activation.__name__
     final_activation = model.final_activation.__name__

{xax-0.2.6 → xax-0.2.8}/xax/nn/functions.py RENAMED Viewed

@@ -1,5 +1,5 @@
 # mypy: disable-error-code="override"
-"""Defines helper Torch functions."""
+"""Defines helper Jax functions."""
 import random
 from dataclasses import is_dataclass
@@ -58,13 +58,16 @@ def recursive_chunk(item: Any, num_chunks: int, dim: int = 0) -> Iterable[Any]:
         yield from np.array_split(item, num_chunks, axis=dim)
     elif is_dataclass(item):
         yield from (
-            item.__class__(**{k: i for k, i in zip(item.__dict__, ii)})
-            for ii in zip(*(recursive_chunk(v, num_chunks, dim) for v in item.__dict__.values()))
+            item.__class__(**{k: i for k, i in zip(item.__dict__, ii, strict=True)})
+            for ii in zip(*(recursive_chunk(v, num_chunks, dim) for v in item.__dict__.values()), strict=False)
         )
     elif isinstance(item, Mapping):
-        yield from (dict(zip(item, ii)) for ii in zip(*(recursive_chunk(i, num_chunks, dim) for i in item.values())))
+        yield from (
+            dict(zip(item, ii, strict=False))
+            for ii in zip(*(recursive_chunk(i, num_chunks, dim) for i in item.values()), strict=False)
+        )
     elif isinstance(item, Sequence):
-        yield from (list(ii) for ii in zip(*(recursive_chunk(i, num_chunks, dim) for i in item)))
+        yield from (list(ii) for ii in zip(*(recursive_chunk(i, num_chunks, dim) for i in item), strict=False))
     else:
         yield from (item for _ in range(num_chunks))

{xax-0.2.6 → xax-0.2.8}/xax/nn/geom.py RENAMED Viewed

@@ -1,5 +1,6 @@
 """Defines geometry functions."""
+import chex
 from jax import numpy as jnp
 from jaxtyping import Array
@@ -211,3 +212,51 @@ def quat_to_rotmat(quat: Array, eps: float = 1e-6) -> Array:
         ],
         axis=-2,
     )
+def normalize(v: jnp.ndarray, axis: int = -1, eps: float = 1e-8) -> jnp.ndarray:
+    norm = jnp.linalg.norm(v, axis=axis, keepdims=True)
+    return v / jnp.clip(norm, a_min=eps)
+def rotation6d_to_rotation_matrix(r6d: jnp.ndarray) -> jnp.ndarray:
+    """Convert 6D rotation representation to rotation matrix.
+    From https://arxiv.org/pdf/1812.07035, Appendix B
+    Args:
+        r6d: The 6D rotation representation, shape (*, 6).
+    Returns:
+        The rotation matrix, shape (*, 3, 3).
+    """
+    chex.assert_shape(r6d, (..., 6))
+    shape = r6d.shape
+    flat = r6d.reshape(-1, 6)
+    a_1 = flat[:, 0:3]
+    a_2 = flat[:, 3:6]
+    b_1 = normalize(a_1, axis=-1)
+    # Reordered Gram-Schmidt orthonormalization.
+    b_3 = normalize(jnp.cross(b_1, a_2), axis=-1)
+    b_2 = jnp.cross(b_3, b_1)
+    rotation_matrix = jnp.stack([b_1, b_2, b_3], axis=-1)
+    return rotation_matrix.reshape(shape[:-1] + (3, 3))
+def rotation_matrix_to_rotation6d(rotation_matrix: jnp.ndarray) -> jnp.ndarray:
+    """Convert rotation matrix to 6D rotation representation.
+    Args:
+        rotation_matrix: The rotation matrix, shape (*, 3, 3).
+    Returns:
+        The 6D rotation representation, shape (*, 6).
+    """
+    chex.assert_shape(rotation_matrix, (..., 3, 3))
+    shape = rotation_matrix.shape
+    # Simply concatenate a1 and a2 from SO(3)
+    r6d = jnp.concatenate([rotation_matrix[..., 0], rotation_matrix[..., 1]], axis=-1)
+    return r6d.reshape(shape[:-2] + (6,))

{xax-0.2.6 → xax-0.2.8}/xax/task/base.py RENAMED Viewed

@@ -184,8 +184,8 @@ class BaseTask(Generic[Config]):
             # Attempts to load any paths as configs.
             is_path = [Path(arg).is_file() or (task_path / arg).is_file() for arg in args]
-            paths = [arg for arg, is_path in zip(args, is_path) if is_path]
-            non_paths = [arg for arg, is_path in zip(args, is_path) if not is_path]
+            paths = [arg for arg, is_path in zip(args, is_path, strict=True) if is_path]
+            non_paths = [arg for arg, is_path in zip(args, is_path, strict=True) if not is_path]
             if paths:
                 cfg = OmegaConf.merge(cfg, *(get_config(path, task_path) for path in paths))
             cfg = OmegaConf.merge(cfg, OmegaConf.from_cli(non_paths))

{xax-0.2.6 → xax-0.2.8}/xax/task/logger.py RENAMED Viewed

@@ -462,11 +462,11 @@ class LoggerImpl(ABC):
         self.tickers = {phase: IntervalTicker(log_interval_seconds) for phase in get_args(Phase)}
-    def start(self) -> None:
-        pass
+    @abstractmethod
+    def start(self) -> None: ...
-    def stop(self) -> None:
-        pass
+    @abstractmethod
+    def stop(self) -> None: ...
     @abstractmethod
     def write(self, line: LogLine) -> None:
@@ -476,6 +476,7 @@ class LoggerImpl(ABC):
             line: The line to write.
         """
+    @abstractmethod
     def write_error_summary(self, error_summary: LogErrorSummary) -> None:
         """Handles writing an error summary.
@@ -483,6 +484,7 @@ class LoggerImpl(ABC):
             error_summary: The error summary to write.
         """
+    @abstractmethod
     def write_error(self, error: LogError) -> None:
         """Handles writing an error line.
@@ -490,6 +492,7 @@ class LoggerImpl(ABC):
             error: The error information to write.
         """
+    @abstractmethod
     def write_status(self, status: LogStatus) -> None:
         """Handles writing a status line.
@@ -497,6 +500,7 @@ class LoggerImpl(ABC):
             status: The status to write.
         """
+    @abstractmethod
     def write_ping(self, ping: LogPing) -> None:
         """Handles writing a ping line.
@@ -504,6 +508,7 @@ class LoggerImpl(ABC):
             ping: The ping to write.
         """
+    @abstractmethod
     def log_file(self, name: str, contents: str) -> None:
         """Logs a large text file.
@@ -621,7 +626,7 @@ class Logger:
             return
         line = self.pack(state)
         self.clear()
-        for lg in (lg for lg, should_log in zip(self.loggers, should_log) if should_log):
+        for lg in (lg for lg, should_log in zip(self.loggers, should_log, strict=False) if should_log):
             lg.write(line)
     def write_error_summary(self, error_summary: str) -> None:
@@ -1045,7 +1050,7 @@ class Logger:
                         line_spacing=line_spacing,
                         centered=centered,
                     )
-                    for img, label in zip(images, labels)
+                    for img, label in zip(images, labels, strict=True)
                 ]
                 tiled = tile_images([img.image for img in labeled], sep)

{xax-0.2.6 → xax-0.2.8}/xax/task/loggers/callback.py RENAMED Viewed

@@ -25,6 +25,12 @@ class CallbackLogger(LoggerImpl):
         self.ping_callback = ping_callback
         self.file_callback = file_callback
+    def start(self) -> None:
+        pass
+    def stop(self) -> None:
+        pass
     def write(self, line: LogLine) -> None:
         self.callback(line)

{xax-0.2.6 → xax-0.2.8}/xax/task/loggers/json.py RENAMED Viewed

@@ -2,13 +2,13 @@
 import json
 import sys
-from dataclasses import asdict
 from typing import Any, Literal, Mapping, TextIO
 from jaxtyping import Array
 from xax.task.logger import (
     LogError,
+    LogErrorSummary,
     LoggerImpl,
     LogLine,
     LogPing,
@@ -58,6 +58,12 @@ class JsonLogger(LoggerImpl):
         self.line_sep = line_sep
         self.remove_unicode_from_namespaces = remove_unicode_from_namespaces
+    def start(self) -> None:
+        pass
+    def stop(self) -> None:
+        pass
     @property
     def fp(self) -> TextIO:
         return self.log_stream
@@ -67,7 +73,7 @@ class JsonLogger(LoggerImpl):
         return self.err_log_stream
     def get_json(self, line: LogLine) -> str:
-        data: dict = {"state": asdict(line.state)}
+        data: dict = {"state": line.state.to_dict()}
         def add_logs(log: Mapping[str, Mapping[str, LogScalar | LogString]], data: dict) -> None:
             for namespace, values in log.items():
@@ -88,6 +94,12 @@ class JsonLogger(LoggerImpl):
         if self.flush_immediately:
             self.fp.flush()
+    def write_error_summary(self, error_summary: LogErrorSummary) -> None:
+        pass
+    def log_file(self, name: str, contents: str) -> None:
+        pass
     def write_error(self, error: LogError) -> None:
         self.err_fp.write(error.message)
         if error.location is not None:

{xax-0.2.6 → xax-0.2.8}/xax/task/loggers/state.py RENAMED Viewed

@@ -3,7 +3,14 @@
 from pathlib import Path
 from typing import Literal
-from xax.task.logger import LoggerImpl, LogLine
+from xax.task.logger import (
+    LogError,
+    LogErrorSummary,
+    LoggerImpl,
+    LogLine,
+    LogPing,
+    LogStatus,
+)
 class StateLogger(LoggerImpl):
@@ -30,3 +37,21 @@ class StateLogger(LoggerImpl):
     def write(self, line: LogLine) -> None:
         pass
+    def start(self) -> None:
+        pass
+    def stop(self) -> None:
+        pass
+    def write_error_summary(self, error_summary: LogErrorSummary) -> None:
+        pass
+    def write_error(self, error: LogError) -> None:
+        pass
+    def write_status(self, status: LogStatus) -> None:
+        pass
+    def write_ping(self, ping: LogPing) -> None:
+        pass

{xax-0.2.6 → xax-0.2.8}/xax/task/loggers/stdout.py RENAMED Viewed

@@ -79,11 +79,13 @@ class StdoutLogger(LoggerImpl):
         self.error_summary: tuple[str, datetime.datetime] | None = None
     def start(self) -> None:
-        return super().start()
+        pass
     def stop(self) -> None:
         self.write_queues()
-        return super().stop()
+    def log_file(self, name: str, contents: str) -> None:
+        pass
     def write_separator(self) -> None:
         self.write_fp.write("\033[2J\033[H")

{xax-0.2.6 → xax-0.2.8}/xax/task/loggers/tensorboard.py RENAMED Viewed

@@ -12,7 +12,7 @@ from typing import TypeVar
 from xax.core.state import Phase
 from xax.nn.parallel import is_master
-from xax.task.logger import LoggerImpl, LogLine
+from xax.task.logger import LogError, LogErrorSummary, LoggerImpl, LogLine, LogPing, LogStatus
 from xax.utils.jax import as_float
 from xax.utils.logging import LOG_STATUS, port_is_busy
 from xax.utils.tensorboard import TensorboardWriter, TensorboardWriters
@@ -236,3 +236,21 @@ class TensorboardLogger(LoggerImpl):
         for name, contents in self.files.items():
             writer.add_text(name, contents)
         self.files.clear()
+    def start(self) -> None:
+        pass
+    def stop(self) -> None:
+        pass
+    def write_error(self, error: LogError) -> None:
+        pass
+    def write_error_summary(self, error_summary: LogErrorSummary) -> None:
+        pass
+    def write_ping(self, ping: LogPing) -> None:
+        pass
+    def write_status(self, status: LogStatus) -> None:
+        pass

{xax-0.2.6 → xax-0.2.8}/xax/task/mixins/artifacts.py RENAMED Viewed

@@ -31,11 +31,13 @@ Config = TypeVar("Config", bound=ArtifactsConfig)
 class ArtifactsMixin(BaseTask[Config]):
     _exp_dir: Path | None
+    _stage_dir: Path | None
     def __init__(self, config: Config) -> None:
         super().__init__(config)
         self._exp_dir = None
+        self._stage_dir = None
     @functools.cached_property
     def run_dir(self) -> Path:
@@ -75,15 +77,16 @@ class ArtifactsMixin(BaseTask[Config]):
         logger.log(LOG_STATUS, self._exp_dir)
         return self._exp_dir
-    @functools.lru_cache(maxsize=None)
     def stage_environment(self) -> Path | None:
-        stage_dir = (self.exp_dir / "code").resolve()
-        try:
-            stage_environment(self, stage_dir)
-        except Exception:
-            logger.exception("Failed to stage environment!")
-            return None
-        return stage_dir
+        if self._stage_dir is None:
+            stage_dir = (self.exp_dir / "code").resolve()
+            try:
+                stage_environment(self, stage_dir)
+            except Exception:
+                logger.exception("Failed to stage environment!")
+                return None
+            self._stage_dir = stage_dir
+        return self._stage_dir
     def on_training_end(self, state: State) -> State:
         state = super().on_training_end(state)

{xax-0.2.6 → xax-0.2.8}/xax/task/mixins/checkpointing.py RENAMED Viewed

@@ -52,6 +52,114 @@ class CheckpointingConfig(ArtifactsConfig):
 Config = TypeVar("Config", bound=CheckpointingConfig)
+@overload
+def load_ckpt(
+    path: Path,
+    *,
+    part: Literal["all"],
+    model_template: PyTree,
+    optimizer_template: PyTree,
+    opt_state_template: PyTree,
+) -> tuple[PyTree, optax.GradientTransformation, optax.OptState, State, DictConfig]: ...
+@overload
+def load_ckpt(
+    path: Path,
+    *,
+    part: Literal["model_state_config"],
+    model_template: PyTree,
+) -> tuple[PyTree, State, DictConfig]: ...
+@overload
+def load_ckpt(path: Path, *, part: Literal["model"], model_template: PyTree) -> PyTree: ...
+@overload
+def load_ckpt(path: Path, *, part: Literal["opt"], optimizer_template: PyTree) -> optax.GradientTransformation: ...
+@overload
+def load_ckpt(path: Path, *, part: Literal["opt_state"], opt_state_template: PyTree) -> optax.OptState: ...
+@overload
+def load_ckpt(path: Path, *, part: Literal["state"]) -> State: ...
+@overload
+def load_ckpt(path: Path, *, part: Literal["config"]) -> DictConfig: ...
+def load_ckpt(
+    path: str | Path,
+    *,
+    part: CheckpointPart = "model",
+    model_template: PyTree | None = None,
+    optimizer_template: PyTree | None = None,
+    opt_state_template: PyTree | None = None,
+) -> (
+    tuple[PyTree, optax.GradientTransformation, optax.OptState, State, DictConfig]
+    | tuple[PyTree, State, DictConfig]
+    | PyTree
+    | optax.GradientTransformation
+    | optax.OptState
+    | State
+    | DictConfig
+):
+    with tarfile.open(path, "r:gz") as tar:
+        def get_model() -> PyTree:
+            if model_template is None:
+                raise ValueError("model_template must be provided to load model weights")
+            if (model := tar.extractfile("model")) is None:
+                raise ValueError(f"Checkpoint does not contain a model file: {path}")
+            return eqx.tree_deserialise_leaves(io.BytesIO(model.read()), model_template)
+        def get_opt() -> optax.GradientTransformation:
+            if optimizer_template is None:
+                raise ValueError("optimizer_template must be provided to load optimizer")
+            if (opt := tar.extractfile("optimizer")) is None:
+                raise ValueError(f"Checkpoint does not contain an optimizer file: {path}")
+            return eqx.tree_deserialise_leaves(io.BytesIO(opt.read()), optimizer_template)
+        def get_opt_state() -> optax.OptState:
+            if opt_state_template is None:
+                raise ValueError("opt_state_template must be provided to load optimizer state")
+            if (opt_state := tar.extractfile("opt_state")) is None:
+                raise ValueError(f"Checkpoint does not contain an optimizer state file: {path}")
+            return eqx.tree_deserialise_leaves(io.BytesIO(opt_state.read()), opt_state_template)
+        def get_state() -> State:
+            if (state := tar.extractfile("state")) is None:
+                raise ValueError(f"Checkpoint does not contain a state file: {path}")
+            return State.from_dict(**json.loads(state.read().decode()))
+        def get_config() -> DictConfig:
+            if (config := tar.extractfile("config")) is None:
+                raise ValueError(f"Checkpoint does not contain a config file: {path}")
+            return cast(DictConfig, OmegaConf.load(config))
+        match part:
+            case "model":
+                return get_model()
+            case "opt":
+                return get_opt()
+            case "opt_state":
+                return get_opt_state()
+            case "state":
+                return get_state()
+            case "config":
+                return get_config()
+            case "model_state_config":
+                return get_model(), get_state(), get_config()
+            case "all":
+                return get_model(), get_opt(), get_opt_state(), get_state(), get_config()
+            case _:
+                raise ValueError(f"Invalid checkpoint part: {part}")
 class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
     def __init__(self, config: Config) -> None:
         super().__init__(config)
@@ -82,149 +190,6 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
                 return True
         return False
-    @overload
-    def load_ckpt_with_template(
-        self,
-        path: Path,
-        *,
-        part: Literal["all"],
-        model_template: PyTree,
-        optimizer_template: PyTree,
-        opt_state_template: PyTree,
-    ) -> tuple[PyTree, optax.GradientTransformation, optax.OptState, State, Config]: ...
-    @overload
-    def load_ckpt_with_template(
-        self,
-        path: Path,
-        *,
-        part: Literal["model_state_config"],
-        model_template: PyTree,
-    ) -> tuple[PyTree, State, Config]: ...
-    @overload
-    def load_ckpt_with_template(
-        self,
-        path: Path,
-        *,
-        part: Literal["model"],
-        model_template: PyTree,
-    ) -> PyTree: ...
-    @overload
-    def load_ckpt_with_template(
-        self,
-        path: Path,
-        *,
-        part: Literal["opt"],
-        optimizer_template: PyTree,
-    ) -> optax.GradientTransformation: ...
-    @overload
-    def load_ckpt_with_template(
-        self,
-        path: Path,
-        *,
-        part: Literal["opt_state"],
-        opt_state_template: PyTree,
-    ) -> optax.OptState: ...
-    @overload
-    def load_ckpt_with_template(
-        self,
-        path: Path,
-        *,
-        part: Literal["state"],
-    ) -> State: ...
-    @overload
-    def load_ckpt_with_template(
-        self,
-        path: Path,
-        *,
-        part: Literal["config"],
-    ) -> Config: ...
-    def load_ckpt_with_template(
-        self,
-        path: Path,
-        *,
-        part: CheckpointPart = "all",
-        model_template: PyTree | None = None,
-        optimizer_template: PyTree | None = None,
-        opt_state_template: PyTree | None = None,
-    ) -> (
-        tuple[PyTree, optax.GradientTransformation, optax.OptState, State, Config]
-        | tuple[PyTree, State, Config]
-        | PyTree
-        | optax.GradientTransformation
-        | optax.OptState
-        | State
-        | Config
-    ):
-        """Load a checkpoint.
-        Args:
-            path: Path to the checkpoint directory
-            part: Which part of the checkpoint to load
-            model_template: Template model with correct structure but uninitialized weights
-            optimizer_template: Template optimizer with correct structure but uninitialized weights
-            opt_state_template: Template optimizer state with correct structure but uninitialized weights
-        Returns:
-            The requested checkpoint components
-        """
-        with tarfile.open(path, "r:gz") as tar:
-            def get_model() -> PyTree:
-                if model_template is None:
-                    raise ValueError("model_template must be provided to load model weights")
-                if (model := tar.extractfile("model")) is None:
-                    raise ValueError(f"Checkpoint does not contain a model file: {path}")
-                return eqx.tree_deserialise_leaves(io.BytesIO(model.read()), model_template)
-            def get_opt() -> optax.GradientTransformation:
-                if optimizer_template is None:
-                    raise ValueError("optimizer_template must be provided to load optimizer")
-                if (opt := tar.extractfile("optimizer")) is None:
-                    raise ValueError(f"Checkpoint does not contain an optimizer file: {path}")
-                return eqx.tree_deserialise_leaves(io.BytesIO(opt.read()), optimizer_template)
-            def get_opt_state() -> optax.OptState:
-                if opt_state_template is None:
-                    raise ValueError("opt_state_template must be provided to load optimizer state")
-                if (opt_state := tar.extractfile("opt_state")) is None:
-                    raise ValueError(f"Checkpoint does not contain an optimizer state file: {path}")
-                return eqx.tree_deserialise_leaves(io.BytesIO(opt_state.read()), opt_state_template)
-            def get_state() -> State:
-                if (state := tar.extractfile("state")) is None:
-                    raise ValueError(f"Checkpoint does not contain a state file: {path}")
-                return State.from_dict(**json.loads(state.read().decode()))
-            def get_config() -> Config:
-                if (config := tar.extractfile("config")) is None:
-                    raise ValueError(f"Checkpoint does not contain a config file: {path}")
-                return self.get_config(cast(DictConfig, OmegaConf.load(config)), use_cli=False)
-            match part:
-                case "model":
-                    return get_model()
-                case "opt":
-                    return get_opt()
-                case "opt_state":
-                    return get_opt_state()
-                case "state":
-                    return get_state()
-                case "config":
-                    return get_config()
-                case "model_state_config":
-                    return get_model(), get_state(), get_config()
-                case "all":
-                    return get_model(), get_opt(), get_opt_state(), get_state(), get_config()
-                case _:
-                    raise ValueError(f"Invalid checkpoint part: {part}")
     def save_checkpoint(
         self,
         model: PyTree | None = None,

{xax-0.2.6 → xax-0.2.8}/xax/task/mixins/train.py RENAMED Viewed

@@ -40,7 +40,7 @@ from xax.core.state import Phase, State
 from xax.nn.functions import set_random_seed
 from xax.nn.parallel import is_master
 from xax.task.mixins.artifacts import ArtifactsConfig, ArtifactsMixin
-from xax.task.mixins.checkpointing import CheckpointingConfig, CheckpointingMixin, CheckpointPart
+from xax.task.mixins.checkpointing import CheckpointingConfig, CheckpointingMixin, CheckpointPart, load_ckpt
 from xax.task.mixins.data_loader import DataloadersConfig, DataloadersMixin
 from xax.task.mixins.logger import LoggerConfig, LoggerMixin
 from xax.task.mixins.runnable import RunnableConfig, RunnableMixin
@@ -57,6 +57,7 @@ from xax.utils.experiments import (
 )
 from xax.utils.jax import jit as xax_jit
 from xax.utils.logging import LOG_PING, LOG_STATUS
+from xax.utils.pytree import get_pytree_param_count
 from xax.utils.text import highlight_exception_message, show_info
 from xax.utils.types.frozen_dict import FrozenDict
@@ -360,6 +361,7 @@ class TrainMixin(
         model = self.get_model(key)
         state = State.init_state()
+        self.log_model_size(model)
         if not load_optimizer:
             return model, state
@@ -450,44 +452,43 @@ class TrainMixin(
         match part:
             case "model_state_config":
                 model_spec = eqx.filter_eval_shape(self.get_model, key)
-                return self.load_ckpt_with_template(path, part="model_state_config", model_template=model_spec)
+                model, state, config = load_ckpt(path, part="model_state_config", model_template=model_spec)
+                config = self.get_config(config, use_cli=False)
+                return model, state, config
             case "model":
                 model_spec = eqx.filter_eval_shape(self.get_model, key)
-                return self.load_ckpt_with_template(path, part="model", model_template=model_spec)
-            case "config":
-                return self.load_ckpt_with_template(path, part="config")
+                return load_ckpt(path, part="model", model_template=model_spec)
             case "opt":
                 optimizer_spec = eqx.filter_eval_shape(self.get_optimizer)
-                return self.load_ckpt_with_template(path, part="opt", optimizer_template=optimizer_spec)
+                return load_ckpt(path, part="opt", optimizer_template=optimizer_spec)
             case "opt_state":
                 if model is None:
                     model_spec = eqx.filter_eval_shape(self.get_model, key)
-                    model = self.load_ckpt_with_template(path, part="model", model_template=model_spec)
+                    model = load_ckpt(path, part="model", model_template=model_spec)
                 if optimizer is None:
                     optimizer_spec = eqx.filter_eval_shape(self.get_optimizer)
-                    optimizer = self.load_ckpt_with_template(path, part="opt", optimizer_template=optimizer_spec)
+                    optimizer = load_ckpt(path, part="opt", optimizer_template=optimizer_spec)
                 opt_state_spec = eqx.filter_eval_shape(self.get_initial_opt_state, model, optimizer)
-                return self.load_ckpt_with_template(path, part="opt_state", opt_state_template=opt_state_spec)
+                return load_ckpt(path, part="opt_state", opt_state_template=opt_state_spec)
             case "state":
-                return self.load_ckpt_with_template(path, part="state")
+                return load_ckpt(path, part="state")
             case "config":
-                return self.load_ckpt_with_template(path, part="config")
+                return self.get_config(load_ckpt(path, part="config"), use_cli=False)
             case "all":
                 model_spec = eqx.filter_eval_shape(self.get_model, key)
-                model = self.load_ckpt_with_template(path, part="model", model_template=model_spec)
+                model = load_ckpt(path, part="model", model_template=model_spec)
                 optimizer_spec = eqx.filter_eval_shape(self.get_optimizer)
-                optimizer = self.load_ckpt_with_template(path, part="opt", optimizer_template=optimizer_spec)
+                optimizer = load_ckpt(path, part="opt", optimizer_template=optimizer_spec)
                 opt_state_spec = eqx.filter_eval_shape(self.get_initial_opt_state, model, optimizer)
-                opt_state = self.load_ckpt_with_template(path, part="opt_state", opt_state_template=opt_state_spec)
-                state = self.load_ckpt_with_template(path, part="state")
-                config = self.load_ckpt_with_template(path, part="config")
+                opt_state = load_ckpt(path, part="opt_state", opt_state_template=opt_state_spec)
+                state = load_ckpt(path, part="state")
+                config = self.get_config(load_ckpt(path, part="config"), use_cli=False)
                 return model, optimizer, opt_state, state, config
             case _:
@@ -683,6 +684,9 @@ class TrainMixin(
         self.logger.log_file("config.yaml", self.config_str(self.config, use_cli=False))
         self.logger.log_file("info.json", get_info_json())
+    def log_model_size(self, model: PyTree) -> None:
+        logger.info("Model size: %s", f"{get_pytree_param_count(model):,}")
     def model_partition_fn(self, item: Any) -> bool:  # noqa: ANN401
         return eqx.is_inexact_array(item)

{xax-0.2.6 → xax-0.2.8}/xax/utils/experiments.py RENAMED Viewed

@@ -749,7 +749,8 @@ class BaseFileDownloader(ABC):
                         f"We detected some HTML elements in the downloaded file. "
                         f"This most likely means that the download triggered an unhandled API response by GDrive. "
                         f"Please report this to torchvision at https://github.com/pytorch/vision/issues including "
-                        f"the response:\n\n{text}"
+                        f"the response:\n\n{text}",
+                        stacklevel=2,
                     )
     @classmethod

{xax-0.2.6 → xax-0.2.8}/xax/utils/jaxpr.py RENAMED Viewed

@@ -3,10 +3,10 @@
 from pathlib import Path
 import jax
-import jax.core
+import jax.extend.core
-def save_jaxpr_dot(closed_jaxpr: jax.core.ClosedJaxpr, filename: str | Path) -> None:
+def save_jaxpr_dot(closed_jaxpr: jax.extend.core.ClosedJaxpr, filename: str | Path) -> None:
     """Save the JAXPR to a DOT file.
     Example usage:
@@ -30,15 +30,15 @@ def save_jaxpr_dot(closed_jaxpr: jax.core.ClosedJaxpr, filename: str | Path) ->
     with open(filename, "w") as f:
         f.write("digraph Jaxpr {\n")
-        var_names: dict[jax.core.Var, str] = {}
+        var_names: dict[jax.extend.core.Var, str] = {}
         var_count = 0
-        def get_var_name(var: jax.core.Var) -> str:
+        def get_var_name(var: jax.extend.core.Var) -> str:
             """Get a unique name for a variable."""
             nonlocal var_names, var_count
             # Handle Literal objects specially since they're not hashable
-            if isinstance(var, jax.core.Literal):
+            if isinstance(var, jax.extend.core.Literal):
                 # Create a name based on the literal value
                 name = f"lit_{var.val}"
                 return name

{xax-0.2.6 → xax-0.2.8}/xax/utils/pytree.py RENAMED Viewed

@@ -1,6 +1,7 @@
 """Utils for accessing, modifying, and otherwise manipulating pytrees."""
 import chex
+import equinox as eqx
 import jax
 import jax.numpy as jnp
 from jax import Array
@@ -57,7 +58,7 @@ def pytree_has_nans(pytree: PyTree) -> Array:
 def update_pytree(cond: Array, new: PyTree, original: PyTree) -> PyTree:
     """Update a pytree based on a condition."""
-    # Tricky, need use tree_map because where expects array leafs.
+    # Tricky, need use tree.map because where expects array leafs.
     return jax.tree.map(lambda x, y: jnp.where(cond, x, y), new, original)
@@ -124,7 +125,7 @@ def reshuffle_pytree(data: PyTree, batch_shape: tuple[int, ...], rng: PRNGKeyArr
 def reshuffle_pytree_independently(data: PyTree, batch_shape: tuple[int, ...], rng: PRNGKeyArray) -> PyTree:
     """Reshuffle a rollout array across arbitrary batch dimensions independently of each other."""
     rngs = jax.random.split(rng, len(batch_shape))
-    perms = [jax.random.permutation(rng_i, dim) for rng_i, dim in zip(rngs, batch_shape)]
+    perms = [jax.random.permutation(rng_i, dim) for rng_i, dim in zip(rngs, batch_shape, strict=True)]
     # n-dimensional index grid from permutations
     idx_grids = jnp.meshgrid(*perms, indexing="ij")
@@ -236,3 +237,9 @@ def reshuffle_pytree_along_dims(
         return x
     return jax.tree.map_with_path(restore_transpose, reshuffled_transposed)
+def get_pytree_param_count(pytree: PyTree) -> int:
+    """Calculates the total number of parameters in a PyTree."""
+    leaves, _ = jax.tree.flatten(pytree)
+    return sum(x.size for x in leaves if isinstance(x, jnp.ndarray) and eqx.is_inexact_array(x))

{xax-0.2.6 → xax-0.2.8}/xax/utils/text.py RENAMED Viewed

@@ -192,7 +192,7 @@ def render_text_blocks(
         if any(len(row) != len(blocks[0]) for row in blocks):
             raise ValueError("All rows must have the same number of blocks in order to align them")
         widths = [[max(len(line) for line in i.lines) if i.width is None else i.width for i in r] for r in blocks]
-        row_widths = [max(i) for i in zip(*widths)]
+        row_widths = [max(i) for i in zip(*widths, strict=True)]
         for row in blocks:
             for i, block in enumerate(row):
                 block.width = row_widths[i]
@@ -263,7 +263,7 @@ def render_text_blocks(
                             if i >= len(block.lines)
                             else colored(pad(block.lines[i], width, block.center), block.color, bold=block.bold)
                         )
-                        for block, width in zip(row, get_widths(row))
+                        for block, width in zip(row, get_widths(row), strict=True)
                     ]
                 )
                 + " │"

{xax-0.2.6 → xax-0.2.8}/xax/utils/types/frozen_dict.py RENAMED Viewed

@@ -133,12 +133,12 @@ class FrozenDict(Mapping[K, V]):
     @classmethod
     def tree_unflatten(cls, keys: tuple[K, ...], values: tuple[Any, ...]) -> "FrozenDict[K, V]":
-        return cls({k: v for k, v in zip(keys, values)}, __unsafe_skip_copy__=True)
+        return cls({k: v for k, v in zip(keys, values, strict=True)}, __unsafe_skip_copy__=True)
 def unfreeze(x: FrozenDict[K, V] | dict[str, Any]) -> dict[Any, Any]:  # noqa: ANN401
     if isinstance(x, FrozenDict):
-        return jax.tree_util.tree_map(lambda y: y, x._dict)
+        return jax.tree.map(lambda y: y, x._dict)
     elif isinstance(x, dict):
         ys = {}
         for key, value in x.items():

{xax-0.2.6 → xax-0.2.8/xax.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: xax
-Version: 0.2.6
+Version: 0.2.8
 Summary: A library for fast Jax experimentation
 Home-page: https://github.com/kscalelabs/xax
 Author: Benjamin Bolte