PyPI - xax - Versions diffs - 0.2.5__tar.gz → 0.2.7__tar.gz - Mend

xax 0.2.5tar.gz → 0.2.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

{xax-0.2.5/xax.egg-info → xax-0.2.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: xax
-Version: 0.2.5
+Version: 0.2.7
 Summary: A library for fast Jax experimentation
 Home-page: https://github.com/kscalelabs/xax
 Author: Benjamin Bolte

{xax-0.2.5 → xax-0.2.7}/xax/__init__.py RENAMED Viewed

@@ -12,7 +12,7 @@ and running the update script:
     python -m scripts.update_api --inplace
 """
-__version__ = "0.2.5"
+__version__ = "0.2.7"
 # This list shouldn't be modified by hand; instead, run the update script.
 __all__ = [
@@ -66,11 +66,13 @@ __all__ = [
     "StateLogger",
     "StdoutLogger",
     "TensorboardLogger",
+    "load_ckpt",
     "CPUStatsOptions",
     "DataloaderConfig",
     "GPUStatsOptions",
     "StepContext",
     "ValidStepTimer",
+    "get_param_count",
     "Script",
     "ScriptConfig",
     "Config",
@@ -230,11 +232,13 @@ NAME_MAP: dict[str, str] = {
     "StateLogger": "task.loggers.state",
     "StdoutLogger": "task.loggers.stdout",
     "TensorboardLogger": "task.loggers.tensorboard",
+    "load_ckpt": "task.mixins.checkpointing",
     "CPUStatsOptions": "task.mixins.cpu_stats",
     "DataloaderConfig": "task.mixins.data_loader",
     "GPUStatsOptions": "task.mixins.gpu_stats",
     "StepContext": "task.mixins.step_wrapper",
     "ValidStepTimer": "task.mixins.train",
+    "get_param_count": "task.mixins.train",
     "Script": "task.script",
     "ScriptConfig": "task.script",
     "Config": "task.task",
@@ -390,11 +394,12 @@ if IMPORT_ALL or TYPE_CHECKING:
     from xax.task.loggers.state import StateLogger
     from xax.task.loggers.stdout import StdoutLogger
     from xax.task.loggers.tensorboard import TensorboardLogger
+    from xax.task.mixins.checkpointing import load_ckpt
     from xax.task.mixins.cpu_stats import CPUStatsOptions
     from xax.task.mixins.data_loader import DataloaderConfig
     from xax.task.mixins.gpu_stats import GPUStatsOptions
     from xax.task.mixins.step_wrapper import StepContext
-    from xax.task.mixins.train import Batch, Output, ValidStepTimer
+    from xax.task.mixins.train import Batch, Output, ValidStepTimer, get_param_count
     from xax.task.script import Script, ScriptConfig
     from xax.task.task import Config, Task
     from xax.utils.data.collate import CollateMode, collate, collate_non_null

{xax-0.2.5 → xax-0.2.7}/xax/nn/functions.py RENAMED Viewed

@@ -1,5 +1,5 @@
 # mypy: disable-error-code="override"
-"""Defines helper Torch functions."""
+"""Defines helper Jax functions."""
 import random
 from dataclasses import is_dataclass

{xax-0.2.5 → xax-0.2.7}/xax/task/logger.py RENAMED Viewed

@@ -521,7 +521,8 @@ class LoggerImpl(ABC):
         Returns:
             If the logger should log the current step.
         """
-        return self.tickers[state.phase].tick(state.elapsed_time_s.item())
+        elapsed_time = state.elapsed_time_s.item() if state.phase == "train" else state.valid_elapsed_time_s.item()
+        return self.tickers[state.phase].tick(elapsed_time)
 class ToastHandler(logging.Handler):

{xax-0.2.5 → xax-0.2.7}/xax/task/loggers/json.py RENAMED Viewed

@@ -2,7 +2,6 @@
 import json
 import sys
-from dataclasses import asdict
 from typing import Any, Literal, Mapping, TextIO
 from jaxtyping import Array
@@ -67,7 +66,7 @@ class JsonLogger(LoggerImpl):
         return self.err_log_stream
     def get_json(self, line: LogLine) -> str:
-        data: dict = {"state": asdict(line.state)}
+        data: dict = {"state": line.state.to_dict()}
         def add_logs(log: Mapping[str, Mapping[str, LogScalar | LogString]], data: dict) -> None:
             for namespace, values in log.items():

{xax-0.2.5 → xax-0.2.7}/xax/task/mixins/checkpointing.py RENAMED Viewed

@@ -52,6 +52,114 @@ class CheckpointingConfig(ArtifactsConfig):
 Config = TypeVar("Config", bound=CheckpointingConfig)
+@overload
+def load_ckpt(
+    path: Path,
+    *,
+    part: Literal["all"],
+    model_template: PyTree,
+    optimizer_template: PyTree,
+    opt_state_template: PyTree,
+) -> tuple[PyTree, optax.GradientTransformation, optax.OptState, State, DictConfig]: ...
+@overload
+def load_ckpt(
+    path: Path,
+    *,
+    part: Literal["model_state_config"],
+    model_template: PyTree,
+) -> tuple[PyTree, State, DictConfig]: ...
+@overload
+def load_ckpt(path: Path, *, part: Literal["model"], model_template: PyTree) -> PyTree: ...
+@overload
+def load_ckpt(path: Path, *, part: Literal["opt"], optimizer_template: PyTree) -> optax.GradientTransformation: ...
+@overload
+def load_ckpt(path: Path, *, part: Literal["opt_state"], opt_state_template: PyTree) -> optax.OptState: ...
+@overload
+def load_ckpt(path: Path, *, part: Literal["state"]) -> State: ...
+@overload
+def load_ckpt(path: Path, *, part: Literal["config"]) -> DictConfig: ...
+def load_ckpt(
+    path: str | Path,
+    *,
+    part: CheckpointPart = "model",
+    model_template: PyTree | None = None,
+    optimizer_template: PyTree | None = None,
+    opt_state_template: PyTree | None = None,
+) -> (
+    tuple[PyTree, optax.GradientTransformation, optax.OptState, State, DictConfig]
+    | tuple[PyTree, State, DictConfig]
+    | PyTree
+    | optax.GradientTransformation
+    | optax.OptState
+    | State
+    | DictConfig
+):
+    with tarfile.open(path, "r:gz") as tar:
+        def get_model() -> PyTree:
+            if model_template is None:
+                raise ValueError("model_template must be provided to load model weights")
+            if (model := tar.extractfile("model")) is None:
+                raise ValueError(f"Checkpoint does not contain a model file: {path}")
+            return eqx.tree_deserialise_leaves(io.BytesIO(model.read()), model_template)
+        def get_opt() -> optax.GradientTransformation:
+            if optimizer_template is None:
+                raise ValueError("optimizer_template must be provided to load optimizer")
+            if (opt := tar.extractfile("optimizer")) is None:
+                raise ValueError(f"Checkpoint does not contain an optimizer file: {path}")
+            return eqx.tree_deserialise_leaves(io.BytesIO(opt.read()), optimizer_template)
+        def get_opt_state() -> optax.OptState:
+            if opt_state_template is None:
+                raise ValueError("opt_state_template must be provided to load optimizer state")
+            if (opt_state := tar.extractfile("opt_state")) is None:
+                raise ValueError(f"Checkpoint does not contain an optimizer state file: {path}")
+            return eqx.tree_deserialise_leaves(io.BytesIO(opt_state.read()), opt_state_template)
+        def get_state() -> State:
+            if (state := tar.extractfile("state")) is None:
+                raise ValueError(f"Checkpoint does not contain a state file: {path}")
+            return State.from_dict(**json.loads(state.read().decode()))
+        def get_config() -> DictConfig:
+            if (config := tar.extractfile("config")) is None:
+                raise ValueError(f"Checkpoint does not contain a config file: {path}")
+            return cast(DictConfig, OmegaConf.load(config))
+        match part:
+            case "model":
+                return get_model()
+            case "opt":
+                return get_opt()
+            case "opt_state":
+                return get_opt_state()
+            case "state":
+                return get_state()
+            case "config":
+                return get_config()
+            case "model_state_config":
+                return get_model(), get_state(), get_config()
+            case "all":
+                return get_model(), get_opt(), get_opt_state(), get_state(), get_config()
+            case _:
+                raise ValueError(f"Invalid checkpoint part: {part}")
 class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
     def __init__(self, config: Config) -> None:
         super().__init__(config)
@@ -82,149 +190,6 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
                 return True
         return False
-    @overload
-    def load_ckpt_with_template(
-        self,
-        path: Path,
-        *,
-        part: Literal["all"],
-        model_template: PyTree,
-        optimizer_template: PyTree,
-        opt_state_template: PyTree,
-    ) -> tuple[PyTree, optax.GradientTransformation, optax.OptState, State, Config]: ...
-    @overload
-    def load_ckpt_with_template(
-        self,
-        path: Path,
-        *,
-        part: Literal["model_state_config"],
-        model_template: PyTree,
-    ) -> tuple[PyTree, State, Config]: ...
-    @overload
-    def load_ckpt_with_template(
-        self,
-        path: Path,
-        *,
-        part: Literal["model"],
-        model_template: PyTree,
-    ) -> PyTree: ...
-    @overload
-    def load_ckpt_with_template(
-        self,
-        path: Path,
-        *,
-        part: Literal["opt"],
-        optimizer_template: PyTree,
-    ) -> optax.GradientTransformation: ...
-    @overload
-    def load_ckpt_with_template(
-        self,
-        path: Path,
-        *,
-        part: Literal["opt_state"],
-        opt_state_template: PyTree,
-    ) -> optax.OptState: ...
-    @overload
-    def load_ckpt_with_template(
-        self,
-        path: Path,
-        *,
-        part: Literal["state"],
-    ) -> State: ...
-    @overload
-    def load_ckpt_with_template(
-        self,
-        path: Path,
-        *,
-        part: Literal["config"],
-    ) -> Config: ...
-    def load_ckpt_with_template(
-        self,
-        path: Path,
-        *,
-        part: CheckpointPart = "all",
-        model_template: PyTree | None = None,
-        optimizer_template: PyTree | None = None,
-        opt_state_template: PyTree | None = None,
-    ) -> (
-        tuple[PyTree, optax.GradientTransformation, optax.OptState, State, Config]
-        | tuple[PyTree, State, Config]
-        | PyTree
-        | optax.GradientTransformation
-        | optax.OptState
-        | State
-        | Config
-    ):
-        """Load a checkpoint.
-        Args:
-            path: Path to the checkpoint directory
-            part: Which part of the checkpoint to load
-            model_template: Template model with correct structure but uninitialized weights
-            optimizer_template: Template optimizer with correct structure but uninitialized weights
-            opt_state_template: Template optimizer state with correct structure but uninitialized weights
-        Returns:
-            The requested checkpoint components
-        """
-        with tarfile.open(path, "r:gz") as tar:
-            def get_model() -> PyTree:
-                if model_template is None:
-                    raise ValueError("model_template must be provided to load model weights")
-                if (model := tar.extractfile("model")) is None:
-                    raise ValueError(f"Checkpoint does not contain a model file: {path}")
-                return eqx.tree_deserialise_leaves(io.BytesIO(model.read()), model_template)
-            def get_opt() -> optax.GradientTransformation:
-                if optimizer_template is None:
-                    raise ValueError("optimizer_template must be provided to load optimizer")
-                if (opt := tar.extractfile("optimizer")) is None:
-                    raise ValueError(f"Checkpoint does not contain an optimizer file: {path}")
-                return eqx.tree_deserialise_leaves(io.BytesIO(opt.read()), optimizer_template)
-            def get_opt_state() -> optax.OptState:
-                if opt_state_template is None:
-                    raise ValueError("opt_state_template must be provided to load optimizer state")
-                if (opt_state := tar.extractfile("opt_state")) is None:
-                    raise ValueError(f"Checkpoint does not contain an optimizer state file: {path}")
-                return eqx.tree_deserialise_leaves(io.BytesIO(opt_state.read()), opt_state_template)
-            def get_state() -> State:
-                if (state := tar.extractfile("state")) is None:
-                    raise ValueError(f"Checkpoint does not contain a state file: {path}")
-                return State.from_dict(**json.loads(state.read().decode()))
-            def get_config() -> Config:
-                if (config := tar.extractfile("config")) is None:
-                    raise ValueError(f"Checkpoint does not contain a config file: {path}")
-                return self.get_config(cast(DictConfig, OmegaConf.load(config)), use_cli=False)
-            match part:
-                case "model":
-                    return get_model()
-                case "opt":
-                    return get_opt()
-                case "opt_state":
-                    return get_opt_state()
-                case "state":
-                    return get_state()
-                case "config":
-                    return get_config()
-                case "model_state_config":
-                    return get_model(), get_state(), get_config()
-                case "all":
-                    return get_model(), get_opt(), get_opt_state(), get_state(), get_config()
-                case _:
-                    raise ValueError(f"Invalid checkpoint part: {part}")
     def save_checkpoint(
         self,
         model: PyTree | None = None,

{xax-0.2.5 → xax-0.2.7}/xax/task/mixins/train.py RENAMED Viewed

@@ -40,7 +40,7 @@ from xax.core.state import Phase, State
 from xax.nn.functions import set_random_seed
 from xax.nn.parallel import is_master
 from xax.task.mixins.artifacts import ArtifactsConfig, ArtifactsMixin
-from xax.task.mixins.checkpointing import CheckpointingConfig, CheckpointingMixin, CheckpointPart
+from xax.task.mixins.checkpointing import CheckpointingConfig, CheckpointingMixin, CheckpointPart, load_ckpt
 from xax.task.mixins.data_loader import DataloadersConfig, DataloadersMixin
 from xax.task.mixins.logger import LoggerConfig, LoggerMixin
 from xax.task.mixins.runnable import RunnableConfig, RunnableMixin
@@ -96,6 +96,12 @@ def batches_per_step_schedule(schedule: list[int] | None) -> list[int] | None:
     return list(itertools.accumulate([0] + schedule))
+def get_param_count(pytree: PyTree) -> int:
+    """Calculates the total number of parameters in a PyTree."""
+    leaves, _ = jax.tree.flatten(pytree)
+    return sum(x.size for x in leaves if isinstance(x, jnp.ndarray))
 class ValidStepTimer:
     def __init__(
         self,
@@ -115,19 +121,22 @@ class ValidStepTimer:
         self.last_valid_time: float | None = None
         self.last_valid_step: int | None = None
+    def _reset(self, state: State) -> None:
+        self.last_valid_time = state.elapsed_time_s.item()
+        self.last_valid_step = state.num_steps.item()
     def is_valid_step(self, state: State) -> bool:
         if state.num_steps < self.valid_first_n_steps:
             return True
         if self.last_valid_time is None or self.last_valid_step is None:
-            self.last_valid_time = state.elapsed_time_s.item()
-            self.last_valid_step = state.num_steps.item()
+            self._reset(state)
             return False
         # Step-based validation.
         valid_every_n_steps = self.valid_every_n_steps
         if valid_every_n_steps is not None and state.num_steps >= valid_every_n_steps + self.last_valid_step:
-            self.last_valid_step = state.num_steps.item()
+            self._reset(state)
             return True
         # Time-based validation.
@@ -136,14 +145,14 @@ class ValidStepTimer:
             valid_every_n_seconds is not None
             and state.elapsed_time_s.item() - self.last_valid_time >= valid_every_n_seconds
         ):
-            self.last_valid_time = state.elapsed_time_s.item()
+            self._reset(state)
             return True
         # Time-based validation for first validation step.
         if self.first_valid_step_flag:
             valid_first_n_seconds = self.valid_first_n_seconds
             if valid_first_n_seconds is not None and state.elapsed_time_s.item() >= valid_first_n_seconds:
-                self.last_valid_time = state.elapsed_time_s.item()
+                self._reset(state)
                 self.first_valid_step_flag = False
                 return True
@@ -357,6 +366,7 @@ class TrainMixin(
         model = self.get_model(key)
         state = State.init_state()
+        self.log_model_size(model)
         if not load_optimizer:
             return model, state
@@ -447,44 +457,43 @@ class TrainMixin(
         match part:
             case "model_state_config":
                 model_spec = eqx.filter_eval_shape(self.get_model, key)
-                return self.load_ckpt_with_template(path, part="model_state_config", model_template=model_spec)
+                model, state, config = load_ckpt(path, part="model_state_config", model_template=model_spec)
+                config = self.get_config(config, use_cli=False)
+                return model, state, config
             case "model":
                 model_spec = eqx.filter_eval_shape(self.get_model, key)
-                return self.load_ckpt_with_template(path, part="model", model_template=model_spec)
-            case "config":
-                return self.load_ckpt_with_template(path, part="config")
+                return load_ckpt(path, part="model", model_template=model_spec)
             case "opt":
                 optimizer_spec = eqx.filter_eval_shape(self.get_optimizer)
-                return self.load_ckpt_with_template(path, part="opt", optimizer_template=optimizer_spec)
+                return load_ckpt(path, part="opt", optimizer_template=optimizer_spec)
             case "opt_state":
                 if model is None:
                     model_spec = eqx.filter_eval_shape(self.get_model, key)
-                    model = self.load_ckpt_with_template(path, part="model", model_template=model_spec)
+                    model = load_ckpt(path, part="model", model_template=model_spec)
                 if optimizer is None:
                     optimizer_spec = eqx.filter_eval_shape(self.get_optimizer)
-                    optimizer = self.load_ckpt_with_template(path, part="opt", optimizer_template=optimizer_spec)
+                    optimizer = load_ckpt(path, part="opt", optimizer_template=optimizer_spec)
                 opt_state_spec = eqx.filter_eval_shape(self.get_initial_opt_state, model, optimizer)
-                return self.load_ckpt_with_template(path, part="opt_state", opt_state_template=opt_state_spec)
+                return load_ckpt(path, part="opt_state", opt_state_template=opt_state_spec)
             case "state":
-                return self.load_ckpt_with_template(path, part="state")
+                return load_ckpt(path, part="state")
             case "config":
-                return self.load_ckpt_with_template(path, part="config")
+                return self.get_config(load_ckpt(path, part="config"), use_cli=False)
             case "all":
                 model_spec = eqx.filter_eval_shape(self.get_model, key)
-                model = self.load_ckpt_with_template(path, part="model", model_template=model_spec)
+                model = load_ckpt(path, part="model", model_template=model_spec)
                 optimizer_spec = eqx.filter_eval_shape(self.get_optimizer)
-                optimizer = self.load_ckpt_with_template(path, part="opt", optimizer_template=optimizer_spec)
+                optimizer = load_ckpt(path, part="opt", optimizer_template=optimizer_spec)
                 opt_state_spec = eqx.filter_eval_shape(self.get_initial_opt_state, model, optimizer)
-                opt_state = self.load_ckpt_with_template(path, part="opt_state", opt_state_template=opt_state_spec)
-                state = self.load_ckpt_with_template(path, part="state")
-                config = self.load_ckpt_with_template(path, part="config")
+                opt_state = load_ckpt(path, part="opt_state", opt_state_template=opt_state_spec)
+                state = load_ckpt(path, part="state")
+                config = self.get_config(load_ckpt(path, part="config"), use_cli=False)
                 return model, optimizer, opt_state, state, config
             case _:
@@ -680,6 +689,9 @@ class TrainMixin(
         self.logger.log_file("config.yaml", self.config_str(self.config, use_cli=False))
         self.logger.log_file("info.json", get_info_json())
+    def log_model_size(self, model: PyTree) -> None:
+        logger.info("Model size: %s", f"{get_param_count(model):,}")
     def model_partition_fn(self, item: Any) -> bool:  # noqa: ANN401
         return eqx.is_inexact_array(item)

{xax-0.2.5 → xax-0.2.7}/xax/utils/jaxpr.py RENAMED Viewed

@@ -3,10 +3,10 @@
 from pathlib import Path
 import jax
-import jax.core
+import jax.extend.core
-def save_jaxpr_dot(closed_jaxpr: jax.core.ClosedJaxpr, filename: str | Path) -> None:
+def save_jaxpr_dot(closed_jaxpr: jax.extend.core.ClosedJaxpr, filename: str | Path) -> None:
     """Save the JAXPR to a DOT file.
     Example usage:
@@ -30,15 +30,15 @@ def save_jaxpr_dot(closed_jaxpr: jax.core.ClosedJaxpr, filename: str | Path) ->
     with open(filename, "w") as f:
         f.write("digraph Jaxpr {\n")
-        var_names: dict[jax.core.Var, str] = {}
+        var_names: dict[jax.extend.core.Var, str] = {}
         var_count = 0
-        def get_var_name(var: jax.core.Var) -> str:
+        def get_var_name(var: jax.extend.core.Var) -> str:
             """Get a unique name for a variable."""
             nonlocal var_names, var_count
             # Handle Literal objects specially since they're not hashable
-            if isinstance(var, jax.core.Literal):
+            if isinstance(var, jax.extend.core.Literal):
                 # Create a name based on the literal value
                 name = f"lit_{var.val}"
                 return name

{xax-0.2.5 → xax-0.2.7}/xax/utils/pytree.py RENAMED Viewed

@@ -57,7 +57,7 @@ def pytree_has_nans(pytree: PyTree) -> Array:
 def update_pytree(cond: Array, new: PyTree, original: PyTree) -> PyTree:
     """Update a pytree based on a condition."""
-    # Tricky, need use tree_map because where expects array leafs.
+    # Tricky, need use tree.map because where expects array leafs.
     return jax.tree.map(lambda x, y: jnp.where(cond, x, y), new, original)

{xax-0.2.5 → xax-0.2.7}/xax/utils/types/frozen_dict.py RENAMED Viewed

@@ -138,7 +138,7 @@ class FrozenDict(Mapping[K, V]):
 def unfreeze(x: FrozenDict[K, V] | dict[str, Any]) -> dict[Any, Any]:  # noqa: ANN401
     if isinstance(x, FrozenDict):
-        return jax.tree_util.tree_map(lambda y: y, x._dict)
+        return jax.tree.map(lambda y: y, x._dict)
     elif isinstance(x, dict):
         ys = {}
         for key, value in x.items():

{xax-0.2.5 → xax-0.2.7/xax.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: xax
-Version: 0.2.5
+Version: 0.2.7
 Summary: A library for fast Jax experimentation
 Home-page: https://github.com/kscalelabs/xax
 Author: Benjamin Bolte