PyPI - xax - Versions diffs - 0.0.7__py3-none-any.whl → 0.1.0__py3-none-any.whl - Mend

xax 0.0.7py3-none-any.whl → 0.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

xax/__init__.py +94 -4
xax/nn/equinox.py +180 -0
xax/nn/export.py +147 -0
xax/nn/geom.py +26 -0
xax/nn/norm.py +23 -0
xax/requirements.txt +1 -0
xax/task/base.py +6 -0
xax/task/logger.py +97 -2
xax/task/loggers/stdout.py +2 -2
xax/task/loggers/tensorboard.py +25 -14
xax/task/mixins/artifacts.py +1 -21
xax/task/mixins/checkpointing.py +19 -5
xax/task/mixins/logger.py +28 -4
xax/task/mixins/step_wrapper.py +23 -32
xax/task/mixins/train.py +50 -34
xax/task/script.py +0 -4
xax/utils/debugging.py +49 -0
xax/utils/experiments.py +23 -4
xax/utils/jaxpr.py +77 -0
xax/utils/pytree.py +189 -1
xax/utils/tensorboard.py +177 -1
{xax-0.0.7.dist-info → xax-0.1.0.dist-info}/METADATA +23 -4
{xax-0.0.7.dist-info → xax-0.1.0.dist-info}/RECORD +26 -21
{xax-0.0.7.dist-info → xax-0.1.0.dist-info}/WHEEL +1 -1
{xax-0.0.7.dist-info → xax-0.1.0.dist-info/licenses}/LICENSE +0 -0
{xax-0.0.7.dist-info → xax-0.1.0.dist-info}/top_level.txt +0 -0

xax/task/logger.py CHANGED Viewed

@@ -223,10 +223,29 @@ class LogVideo:
     fps: int
+@dataclass(kw_only=True)
+class LogDistribution:
+    mean: Number
+    std: Number
+@dataclass(kw_only=True)
+class LogHistogram:
+    min: Number
+    max: Number
+    num: int
+    sum: Number
+    sum_squares: Number
+    bucket_limits: list[Number]
+    bucket_counts: list[int]
 @dataclass(kw_only=True)
 class LogLine:
     state: State
     scalars: dict[str, dict[str, Number]]
+    distributions: dict[str, dict[str, LogDistribution]]
+    histograms: dict[str, dict[str, LogHistogram]]
     strings: dict[str, dict[str, str]]
     images: dict[str, dict[str, LogImage]]
     videos: dict[str, dict[str, LogVideo]]
@@ -329,9 +348,9 @@ def image_with_text(
     else:
         text = text[:max_num_lines]
     width, height = image.size
-    font: ImageFont.ImageFont = ImageFont.load_default()
+    font: ImageFont.ImageFont | ImageFont.FreeTypeFont = ImageFont.load_default()
     _, _, _, line_height = font.getbbox(text[0])
-    new_width, new_height = width, height + line_spacing + max_num_lines * (line_height + line_spacing)
+    new_width, new_height = width, int(height + line_spacing + max_num_lines * (line_height + line_spacing))
     padded_image = Image.new(image.mode, (new_width, new_height), 255)
     padded_image.paste(image, (0, 0))
     drawer = ImageDraw.Draw(padded_image)
@@ -497,6 +516,8 @@ class Logger:
     def __init__(self, default_namespace: str = DEFAULT_NAMESPACE) -> None:
         self.scalars: dict[str, dict[str, Callable[[], Number]]] = defaultdict(dict)
+        self.distributions: dict[str, dict[str, Callable[[], LogDistribution]]] = defaultdict(dict)
+        self.histograms: dict[str, dict[str, Callable[[], LogHistogram]]] = defaultdict(dict)
         self.strings: dict[str, dict[str, Callable[[], str]]] = defaultdict(dict)
         self.images: dict[str, dict[str, Callable[[], LogImage]]] = defaultdict(dict)
         self.videos: dict[str, dict[str, Callable[[], LogVideo]]] = defaultdict(dict)
@@ -522,6 +543,8 @@ class Logger:
         return LogLine(
             state=state,
             scalars={k: {kk: v() for kk, v in v.items()} for k, v in self.scalars.items()},
+            distributions={k: {kk: v() for kk, v in v.items()} for k, v in self.distributions.items()},
+            histograms={k: {kk: v() for kk, v in v.items()} for k, v in self.histograms.items()},
             strings={k: {kk: v() for kk, v in v.items()} for k, v in self.strings.items()},
             images={k: {kk: v() for kk, v in v.items()} for k, v in self.images.items()},
             videos={k: {kk: v() for kk, v in v.items()} for k, v in self.videos.items()},
@@ -529,6 +552,8 @@ class Logger:
     def clear(self) -> None:
         self.scalars.clear()
+        self.distributions.clear()
+        self.histograms.clear()
         self.strings.clear()
         self.images.clear()
         self.videos.clear()
@@ -612,6 +637,76 @@ class Logger:
         self.scalars[namespace][key] = scalar_future
+    def log_distribution(
+        self,
+        key: str,
+        value: Callable[[], tuple[Number, Number]] | tuple[Number, Number],
+        *,
+        namespace: str | None = None,
+    ) -> None:
+        """Logs a distribution value.
+        Args:
+            key: The key being logged
+            value: The distribution value being logged, a tuple of (mean, std)
+            namespace: An optional logging namespace
+        """
+        if not self.active:
+            raise RuntimeError("The logger is not active")
+        namespace = self.resolve_namespace(namespace)
+        @functools.lru_cache(maxsize=None)
+        def distribution_future() -> LogDistribution:
+            mean, std = value() if callable(value) else value
+            return LogDistribution(mean=mean, std=std)
+        self.distributions[namespace][key] = distribution_future
+    def log_histogram(
+        self,
+        key: str,
+        value: Callable[[], np.ndarray | Array] | np.ndarray | Array,
+        *,
+        bins: int = 100,
+        namespace: str | None = None,
+    ) -> None:
+        """Logs a histogram value.
+        Args:
+            key: The key being logged
+            value: The histogram value being logged
+            bins: The number of bins to use for the histogram
+            namespace: An optional logging namespace
+        """
+        if not self.active:
+            raise RuntimeError("The logger is not active")
+        namespace = self.resolve_namespace(namespace)
+        @functools.lru_cache(maxsize=None)
+        def histogram_future() -> LogHistogram:
+            values = value() if callable(value) else value
+            values = values.reshape(-1)  # Must be flat.
+            if isinstance(values, Array):
+                counts, limits = jnp.histogram(values, bins=bins)
+                counts, limits = as_numpy(counts), as_numpy(limits)
+            elif isinstance(values, np.ndarray):
+                counts, limits = np.histogram(values, bins=bins)
+            else:
+                raise ValueError(f"Unsupported histogram type: {type(values)}")
+            return LogHistogram(
+                min=float(values.min()),
+                max=float(values.max()),
+                num=int(values.size),
+                sum=float(values.sum()),
+                sum_squares=float(values.dot(values)),
+                bucket_limits=limits[1:].tolist(),
+                bucket_counts=counts.tolist(),
+            )
+        self.histograms[namespace][key] = histogram_future
     def log_string(self, key: str, value: Callable[[], str] | str, *, namespace: str | None = None) -> None:
         """Logs a string value.

xax/task/loggers/stdout.py CHANGED Viewed

@@ -33,7 +33,7 @@ class StdoutLogger(LoggerImpl):
         self,
         write_fp: TextIO = sys.stdout,
         precision: int = 4,
-        log_timers: bool = False,
+        log_timers: bool = True,
         log_perf: bool = False,
         log_optim: bool = False,
         log_fp: bool = False,
@@ -98,7 +98,7 @@ class StdoutLogger(LoggerImpl):
         def add_logs(log: dict[str, dict[str, Any]], namespace_to_lines: dict[str, dict[str, str]]) -> None:
             for namespace, values in log.items():
-                if not self.log_timers and namespace.startswith("⏰"):
+                if not self.log_timers and namespace.startswith("⌛"):
                     continue
                 if not self.log_perf and namespace.startswith("🔧"):
                     continue

xax/task/loggers/tensorboard.py CHANGED Viewed

@@ -1,11 +1,9 @@
 """Defines a Tensorboard logger backend."""
 import atexit
-import functools
 import logging
 import os
 import re
-import shutil
 import subprocess
 import threading
 import time
@@ -140,15 +138,6 @@ class TensorboardLogger(LoggerImpl):
     def __del__(self) -> None:
         self.cleanup()
-    @functools.lru_cache(None)  # Avoid clearing logs multiple times.
-    def clear_logs(self) -> None:
-        if not self.log_directory.exists():
-            return
-        if not any(child.is_dir() for child in self.log_directory.iterdir()):
-            return
-        logger.warning("Clearing TensorBoard logs")
-        shutil.rmtree(self.log_directory)
     def get_writer(self, phase: Phase) -> TensorboardWriter:
         self._start()
         return self.writers.writer(phase)
@@ -162,9 +151,6 @@ class TensorboardLogger(LoggerImpl):
         if not is_master():
             return
-        if line.state.num_steps == 0:
-            self.clear_logs()
         writer = self.get_writer(line.state.phase)
         walltime = line.state.start_time_s + line.state.elapsed_time_s
@@ -177,6 +163,31 @@ class TensorboardLogger(LoggerImpl):
                     walltime=walltime,
                 )
+        for namespace, distributions in line.distributions.items():
+            for distribution_key, distribution_value in distributions.items():
+                writer.add_gaussian_distribution(
+                    f"{namespace}/{distribution_key}",
+                    mean=float(distribution_value.mean),
+                    std=float(distribution_value.std),
+                    global_step=line.state.num_steps,
+                    walltime=walltime,
+                )
+        for namespace, histograms in line.histograms.items():
+            for histogram_key, histogram_value in histograms.items():
+                writer.add_histogram_raw(
+                    f"{namespace}/{histogram_key}",
+                    min=float(histogram_value.min),
+                    max=float(histogram_value.max),
+                    num=int(histogram_value.num),
+                    sum=float(histogram_value.sum),
+                    sum_squares=float(histogram_value.sum_squares),
+                    bucket_limits=[float(x) for x in histogram_value.bucket_limits],
+                    bucket_counts=[int(x) for x in histogram_value.bucket_counts],
+                    global_step=line.state.num_steps,
+                    walltime=walltime,
+                )
         for namespace, strings in line.strings.items():
             for string_key, string_value in strings.items():
                 writer.add_text(

xax/task/mixins/artifacts.py CHANGED Viewed

@@ -3,7 +3,6 @@
 import functools
 import inspect
 import logging
-import os
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Self, TypeVar
@@ -54,20 +53,6 @@ class ArtifactsMixin(BaseTask[Config]):
         self._exp_dir = Path(exp_dir).expanduser().resolve()
         return self
-    def add_lock_file(self, lock_type: str, *, exists_ok: bool = False) -> None:
-        if (lock_file := self.exp_dir / f".lock_{lock_type}").exists():
-            if not exists_ok:
-                raise RuntimeError(f"Lock file already exists at {lock_file}")
-        else:
-            with open(lock_file, "w", encoding="utf-8") as f:
-                f.write(f"PID: {os.getpid()}")
-    def remove_lock_file(self, lock_type: str, *, missing_ok: bool = False) -> None:
-        if (lock_file := self.exp_dir / f".lock_{lock_type}").exists():
-            lock_file.unlink()
-        elif not missing_ok:
-            raise RuntimeError(f"Lock file not found at {lock_file}")
     def get_exp_dir(self) -> Path:
         if self._exp_dir is not None:
             return self._exp_dir
@@ -82,13 +67,8 @@ class ArtifactsMixin(BaseTask[Config]):
         def get_exp_dir(run_id: int) -> Path:
             return self.run_dir / f"run_{run_id}"
-        def has_lock_file(exp_dir: Path, lock_type: str | None = None) -> bool:
-            if lock_type is not None:
-                return (exp_dir / f".lock_{lock_type}").exists()
-            return any(exp_dir.glob(".lock_*"))
         run_id = 0
-        while (exp_dir := get_exp_dir(run_id)).is_dir() and has_lock_file(exp_dir):
+        while (exp_dir := get_exp_dir(run_id)).is_dir():
             run_id += 1
         exp_dir.mkdir(exist_ok=True, parents=True)
         self._exp_dir = exp_dir.expanduser().resolve()

xax/task/mixins/checkpointing.py CHANGED Viewed

@@ -21,7 +21,7 @@ from xax.task.mixins.artifacts import ArtifactsConfig, ArtifactsMixin
 logger = logging.getLogger(__name__)
-CheckpointPart = Literal["model", "opt", "opt_state", "state", "config"]
+CheckpointPart = Literal["model", "opt", "opt_state", "state", "config", "model_state_config", "all"]
 def get_ckpt_path(exp_dir: Path, state: State | None = None) -> Path:
@@ -88,8 +88,16 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
     def load_checkpoint(
         self,
         path: Path,
+        part: Literal["all"] = "all",
     ) -> tuple[PyTree, optax.GradientTransformation, optax.OptState, State, DictConfig]: ...
+    @overload
+    def load_checkpoint(
+        self,
+        path: Path,
+        part: Literal["model_state_config"] = "model_state_config",
+    ) -> tuple[PyTree, State, DictConfig]: ...
     @overload
     def load_checkpoint(self, path: Path, part: Literal["model"]) -> PyTree: ...
@@ -108,15 +116,19 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
     def load_checkpoint(
         self,
         path: Path,
-        part: CheckpointPart | None = None,
+        part: CheckpointPart = "all",
     ) -> (
         tuple[PyTree, optax.GradientTransformation, optax.OptState, State, DictConfig]
+        | tuple[PyTree, State, DictConfig]
         | PyTree
         | optax.GradientTransformation
         | optax.OptState
         | State
         | DictConfig
     ):
+        # Calls the base callback.
+        self.on_before_checkpoint_load(path)
         with tarfile.open(path, "r:gz") as tar:
             def get_model() -> PyTree:
@@ -155,7 +167,9 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
                     return get_state()
                 case "config":
                     return get_config()
-                case None:
+                case "model_state_config":
+                    return get_model(), get_state(), get_config()
+                case "all":
                     return get_model(), get_opt(), get_opt_state(), get_state(), get_config()
                 case _:
                     raise ValueError(f"Invalid checkpoint part: {part}")
@@ -215,7 +229,7 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
         except FileExistsError:
             logger.exception("Exception while trying to update %s", ckpt_path)
-        # Marks directory as having artifacts which shouldn't be overwritten.
-        self.add_lock_file("ckpt", exists_ok=True)
+        # Calls the base callback.
+        self.on_after_checkpoint_save(ckpt_path, state)
         return ckpt_path

xax/task/mixins/logger.py CHANGED Viewed

@@ -8,6 +8,7 @@ from typing import Generic, Self, TypeVar
 import jax
+from xax.core.conf import field
 from xax.core.state import State
 from xax.task.base import BaseConfig, BaseTask
 from xax.task.logger import Logger, LoggerImpl
@@ -22,7 +23,14 @@ from xax.utils.text import is_interactive_session
 @jax.tree_util.register_dataclass
 @dataclass
 class LoggerConfig(BaseConfig):
-    pass
+    log_interval_seconds: float = field(
+        value=1.0,
+        help="The interval between successive log lines.",
+    )
+    tensorboard_log_interval_seconds: float = field(
+        value=10.0,
+        help="The interval between successive Tensorboard log lines.",
+    )
 Config = TypeVar("Config", bound=LoggerConfig)
@@ -49,11 +57,27 @@ class LoggerMixin(BaseTask[Config], Generic[Config]):
         self.logger.add_logger(*logger)
     def set_loggers(self) -> None:
-        self.add_logger(StdoutLogger() if is_interactive_session() else JsonLogger())
+        self.add_logger(
+            StdoutLogger(
+                log_interval_seconds=self.config.log_interval_seconds,
+            )
+            if is_interactive_session()
+            else JsonLogger(
+                log_interval_seconds=self.config.log_interval_seconds,
+            )
+        )
+        # If this is also an ArtifactsMixin, we should default add some
+        # additional loggers which log data to the artifacts directory.
         if isinstance(self, ArtifactsMixin):
             self.add_logger(
-                StateLogger(self.exp_dir),
-                TensorboardLogger(self.exp_dir),
+                StateLogger(
+                    run_directory=self.exp_dir,
+                ),
+                TensorboardLogger(
+                    run_directory=self.exp_dir,
+                    log_interval_seconds=self.config.tensorboard_log_interval_seconds,
+                ),
             )
     def write_logs(self, state: State) -> None:

xax/task/mixins/step_wrapper.py CHANGED Viewed

@@ -1,53 +1,39 @@
 """Defines a mixin to wrap some steps in a context manager."""
+import time
 from dataclasses import dataclass
 from types import TracebackType
-from typing import ContextManager, Literal, TypeVar
+from typing import Callable, ContextManager, TypeVar
-import equinox as eqx
 import jax
 from xax.task.base import BaseConfig, BaseTask
-StepType = Literal[
-    "backward",
-    "change_mode",
-    "clip_grads",
-    "create_optimizers",
-    "forward",
-    "get_dataloader",
-    "get_dataset",
-    "get_prefetcher",
-    "get_model",
-    "get_optimizer",
-    "get_initial_opt_state",
-    "get_update_fn",
-    "load_checkpoint",
-    "log_losses",
-    "model_to_device",
-    "on_step_end",
-    "on_step_start",
-    "save_checkpoint",
-    "step",
-    "update_state",
-    "write_logs",
-    "zero_grads",
-]
 class StepContext(ContextManager):
     """Context manager to get the current step type."""
-    CURRENT_STEP: StepType | None = None
+    CURRENT_STEP: str | None = None
-    def __init__(self, step: StepType) -> None:
+    def __init__(
+        self,
+        step: str,
+        on_context_start: Callable[[str], None],
+        on_context_end: Callable[[str, float], None],
+    ) -> None:
         self.step = step
+        self.start_time = 0.0
+        self.on_context_start = on_context_start
+        self.on_context_end = on_context_end
     def __enter__(self) -> None:
         StepContext.CURRENT_STEP = self.step
+        self.start_time = time.time()
+        self.on_context_start(self.step)
     def __exit__(self, _t: type[BaseException] | None, _e: BaseException | None, _tr: TracebackType | None) -> None:
         StepContext.CURRENT_STEP = None
+        self.on_context_end(self.step, time.time() - self.start_time)
 @jax.tree_util.register_dataclass
@@ -63,6 +49,11 @@ class StepContextMixin(BaseTask[Config]):
     def __init__(self, config: Config) -> None:
         super().__init__(config)
-    @eqx.filter_jit
-    def step_context(self, step: StepType) -> ContextManager:
-        return StepContext(step)
+    def step_context(self, step: str) -> ContextManager:
+        return StepContext(step, self.on_context_start, self.on_context_stop)
+    def on_context_start(self, step: str) -> None:
+        pass
+    def on_context_stop(self, step: str, elapsed_time: float) -> None:
+        pass

xax/task/mixins/train.py CHANGED Viewed

@@ -24,6 +24,7 @@ from typing import (
     TypeVar,
     cast,
     get_args,
+    overload,
 )
 import equinox as eqx
@@ -35,6 +36,7 @@ from omegaconf import DictConfig
 from xax.core.conf import field
 from xax.core.state import Phase, State
+from xax.nn.functions import set_random_seed
 from xax.nn.parallel import is_master
 from xax.task.mixins.artifacts import ArtifactsConfig, ArtifactsMixin
 from xax.task.mixins.checkpointing import CheckpointingConfig, CheckpointingMixin
@@ -115,7 +117,7 @@ class ValidStepTimer:
         if self.last_valid_time is None or self.last_valid_step is None:
             self.last_valid_time = state.elapsed_time_s
             self.last_valid_step = state.num_steps
-            return True
+            return False
         # Step-based validation.
         valid_every_n_steps = self.valid_every_n_steps
@@ -183,6 +185,9 @@ class TrainMixin(
     def __init__(self, config: Config) -> None:
         super().__init__(config)
+        # Sets the random seed whenever we instantiate a new train mixin.
+        set_random_seed(self.config.random_seed)
         # Timer for validation steps.
         self.valid_step_timer = ValidStepTimer(
             valid_every_n_steps=config.valid_every_n_steps,
@@ -279,31 +284,53 @@ class TrainMixin(
     def get_initial_opt_state(self, model: PyTree, optimizer: optax.GradientTransformation) -> optax.OptState:
         return optimizer.init(eqx.filter(model, eqx.is_array))
+    @overload
+    def load_initial_state(
+        self,
+        key: PRNGKeyArray,
+        load_optimizer: Literal[False] = False,
+    ) -> tuple[PyTree, State]: ...
+    @overload
     def load_initial_state(
         self,
         key: PRNGKeyArray,
-    ) -> tuple[PyTree, optax.GradientTransformation, optax.OptState, State]:
+        load_optimizer: Literal[True],
+    ) -> tuple[PyTree, optax.GradientTransformation, optax.OptState, State]: ...
+    def load_initial_state(
+        self,
+        key: PRNGKeyArray,
+        load_optimizer: bool = False,
+    ) -> tuple[PyTree, State] | tuple[PyTree, optax.GradientTransformation, optax.OptState, State]:
         init_ckpt_path = self.get_init_ckpt_path()
         if init_ckpt_path is not None:
             logger.info("Loading checkpoint from %s", init_ckpt_path)
-            with self.step_context("load_checkpoint"):
+            if load_optimizer:
                 model, optimizer, opt_state, state, config = self.load_checkpoint(init_ckpt_path)
                 config_diff = get_diff_string(diff_configs(config, cast(DictConfig, self.config)))
                 if config_diff:
                     logger.warning("Loaded config differs from current config:\n%s", config_diff)
                 return model, optimizer, opt_state, state
-        with self.step_context("get_model"):
-            model = self.get_model(key)
+            else:
+                model, state, config = self.load_checkpoint(init_ckpt_path, "model_state_config")
+                config_diff = get_diff_string(diff_configs(config, cast(DictConfig, self.config)))
+                if config_diff:
+                    logger.warning("Loaded config differs from current config:\n%s", config_diff)
+                return model, state
+        model = self.get_model(key)
+        state = State.init_state()
-        with self.step_context("get_optimizer"):
-            optimizer = self.get_optimizer()
+        if not load_optimizer:
+            return model, state
-        with self.step_context("get_initial_opt_state"):
-            opt_state = self.get_initial_opt_state(model, optimizer)
+        optimizer = self.get_optimizer()
+        opt_state = self.get_initial_opt_state(model, optimizer)
-        return model, optimizer, opt_state, State.init_state()
+        return model, optimizer, opt_state, state
     @eqx.filter_jit
     def get_output(self, model: PyTree, batch: Batch) -> Output:
@@ -424,6 +451,7 @@ class TrainMixin(
     def log_state(self) -> None:
         logger.log(LOG_STATUS, self.task_path)
         logger.log(LOG_STATUS, self.task_name)
+        logger.log(LOG_STATUS, "JAX devices: %s", jax.devices())
         self.logger.log_file("git_state.txt", get_git_state(self))
         self.logger.log_file("training_code.txt", get_training_code(self))
         self.logger.log_file("config.yaml", self.config_str(self.config, use_cli=False))
@@ -456,7 +484,8 @@ class TrainMixin(
         while not self.is_training_over(state):
             if self.valid_step_timer.is_valid_step(state):
                 valid_batch = next(valid_pf)
-                model, loss, output = self.val_step(model, valid_batch)
+                with self.step_context("model_step"):
+                    model, loss, output = self.val_step(model, valid_batch)
                 # Perform logging.
                 with self.step_context("write_logs"):
@@ -464,22 +493,19 @@ class TrainMixin(
                     self.log_step(model, valid_batch, output, loss, state)
                     state.num_valid_samples += 1
-            with self.step_context("on_step_start"):
-                state = self.on_step_start(state)
+            state = self.on_step_start(state)
-            with self.step_context("update_state"):
+            with self.step_context("model_step"):
                 train_batch = next(train_pf)
                 model, opt_state, loss, output = self.train_step(model, optimizer, opt_state, train_batch)
-            # Perform logging.
             with self.step_context("write_logs"):
                 state.phase = "train"
                 self.log_step(model, train_batch, output, loss, state)
                 state.num_steps += 1
                 state.num_samples += self.get_size_of_batch(train_batch) or 0
-            with self.step_context("on_step_end"):
-                state = self.on_step_end(state)
+            state = self.on_step_end(state)
             if self.should_checkpoint(state):
                 self.save_checkpoint(model, optimizer, opt_state, state)
@@ -496,14 +522,9 @@ class TrainMixin(
         except NotImplementedError:
             pass
-        with self.step_context("get_dataset"):
-            train_ds = self.get_dataset("train")
-        with self.step_context("get_dataloader"):
-            train_dl = self.get_dataloader(train_ds, "train")
-        with self.step_context("get_prefetcher"):
-            train_pf = self.get_prefetcher(train_dl)
+        train_ds = self.get_dataset("train")
+        train_dl = self.get_dataloader(train_ds, "train")
+        train_pf = self.get_prefetcher(train_dl)
         try:
             with train_pf as train_pf_ctx:
@@ -520,14 +541,9 @@ class TrainMixin(
         except NotImplementedError:
             pass
-        with self.step_context("get_dataset"):
-            valid_ds = self.get_dataset("valid")
-        with self.step_context("get_dataloader"):
-            valid_dl = self.get_dataloader(valid_ds, "valid")
-        with self.step_context("get_prefetcher"):
-            valid_pf = self.get_prefetcher(valid_dl)
+        valid_ds = self.get_dataset("valid")
+        valid_dl = self.get_dataloader(valid_ds, "valid")
+        valid_pf = self.get_prefetcher(valid_dl)
         try:
             with valid_pf as valid_pf_ctx:
@@ -559,7 +575,7 @@ class TrainMixin(
                 Thread(target=self.log_state, daemon=True).start()
             key, model_key = jax.random.split(key)
-            model, optimizer, opt_state, state = self.load_initial_state(model_key)
+            model, optimizer, opt_state, state = self.load_initial_state(model_key, load_optimizer=True)
             state = self.on_training_start(state)
             def on_exit() -> None:

xax 0.0.7__py3-none-any.whl → 0.1.0__py3-none-any.whl

xax 0.0.7py3-none-any.whl → 0.1.0py3-none-any.whl