PyPI - xax - Versions diffs - 0.2.0__tar.gz → 0.2.2__tar.gz - Mend

xax 0.2.0tar.gz → 0.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

{xax-0.2.0/xax.egg-info → xax-0.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: xax
-Version: 0.2.0
+Version: 0.2.2
 Summary: A library for fast Jax experimentation
 Home-page: https://github.com/kscalelabs/xax
 Author: Benjamin Bolte

{xax-0.2.0 → xax-0.2.2}/xax/__init__.py RENAMED Viewed

@@ -12,7 +12,7 @@ and running the update script:
     python -m scripts.update_api --inplace
 """
-__version__ = "0.2.0"
+__version__ = "0.2.2"
 # This list shouldn't be modified by hand; instead, run the update script.
 __all__ = [
@@ -44,6 +44,7 @@ __all__ = [
     "euler_to_quat",
     "get_projected_gravity_vector_from_quat",
     "quat_to_euler",
+    "quat_to_rotmat",
     "rotate_vector_by_quat",
     "cross_entropy",
     "cast_norm_type",
@@ -206,6 +207,7 @@ NAME_MAP: dict[str, str] = {
     "euler_to_quat": "nn.geom",
     "get_projected_gravity_vector_from_quat": "nn.geom",
     "quat_to_euler": "nn.geom",
+    "quat_to_rotmat": "nn.geom",
     "rotate_vector_by_quat": "nn.geom",
     "cross_entropy": "nn.losses",
     "cast_norm_type": "nn.norm",
@@ -369,6 +371,7 @@ if IMPORT_ALL or TYPE_CHECKING:
         euler_to_quat,
         get_projected_gravity_vector_from_quat,
         quat_to_euler,
+        quat_to_rotmat,
         rotate_vector_by_quat,
     )
     from xax.nn.losses import cross_entropy

{xax-0.2.0 → xax-0.2.2}/xax/nn/geom.py RENAMED Viewed

@@ -177,3 +177,37 @@ def cubic_bezier_interpolation(y_start: Array, y_end: Array, x: Array) -> Array:
     y_diff = y_end - y_start
     bezier = x**3 + 3 * (x**2 * (1 - x))
     return y_start + y_diff * bezier
+def quat_to_rotmat(quat: Array, eps: float = 1e-6) -> Array:
+    """Converts a quaternion to a rotation matrix.
+    Args:
+        quat: The quaternion to convert, shape (*, 4).
+        eps: A small epsilon value to avoid division by zero.
+    Returns:
+        The rotation matrix, shape (*, 3, 3).
+    """
+    quat = quat / (jnp.linalg.norm(quat, axis=-1, keepdims=True) + eps)
+    w, x, y, z = jnp.split(quat, 4, axis=-1)
+    xx = 1 - 2 * (y * y + z * z)
+    xy = 2 * (x * y - z * w)
+    xz = 2 * (x * z + y * w)
+    yx = 2 * (x * y + z * w)
+    yy = 1 - 2 * (x * x + z * z)
+    yz = 2 * (y * z - x * w)
+    zx = 2 * (x * z - y * w)
+    zy = 2 * (y * z + x * w)
+    zz = 1 - 2 * (x * x + y * y)
+    # Corrected stacking: row-major order
+    return jnp.concatenate(
+        [
+            jnp.concatenate([xx, xy, xz], axis=-1)[..., None, :],
+            jnp.concatenate([yx, yy, yz], axis=-1)[..., None, :],
+            jnp.concatenate([zx, zy, zz], axis=-1)[..., None, :],
+        ],
+        axis=-2,
+    )

{xax-0.2.0 → xax-0.2.2}/xax/task/mixins/checkpointing.py RENAMED Viewed

@@ -63,10 +63,7 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
     def get_init_ckpt_path(self) -> Path | None:
         if self._exp_dir is not None:
-            ckpt_path = self.get_ckpt_path()
-            if not ckpt_path.exists():
-                logger.warning("No checkpoint found in experiment directory: %s", ckpt_path)
-            else:
+            if (ckpt_path := self.get_ckpt_path()).exists():
                 return ckpt_path
         if self.config.load_from_ckpt_path is not None:
             ckpt_path = Path(self.config.load_from_ckpt_path)
@@ -86,7 +83,7 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
         return False
     @overload
-    def load_checkpoint(
+    def load_ckpt_with_template(
         self,
         path: Path,
         *,
@@ -97,7 +94,7 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
     ) -> tuple[PyTree, optax.GradientTransformation, optax.OptState, State, Config]: ...
     @overload
-    def load_checkpoint(
+    def load_ckpt_with_template(
         self,
         path: Path,
         *,
@@ -106,7 +103,7 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
     ) -> tuple[PyTree, State, Config]: ...
     @overload
-    def load_checkpoint(
+    def load_ckpt_with_template(
         self,
         path: Path,
         *,
@@ -115,7 +112,7 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
     ) -> PyTree: ...
     @overload
-    def load_checkpoint(
+    def load_ckpt_with_template(
         self,
         path: Path,
         *,
@@ -124,7 +121,7 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
     ) -> optax.GradientTransformation: ...
     @overload
-    def load_checkpoint(
+    def load_ckpt_with_template(
         self,
         path: Path,
         *,
@@ -133,7 +130,7 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
     ) -> optax.OptState: ...
     @overload
-    def load_checkpoint(
+    def load_ckpt_with_template(
         self,
         path: Path,
         *,
@@ -141,14 +138,14 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
     ) -> State: ...
     @overload
-    def load_checkpoint(
+    def load_ckpt_with_template(
         self,
         path: Path,
         *,
         part: Literal["config"],
     ) -> Config: ...
-    def load_checkpoint(
+    def load_ckpt_with_template(
         self,
         path: Path,
         *,

{xax-0.2.0 → xax-0.2.2}/xax/task/mixins/cpu_stats.py RENAMED Viewed

@@ -218,33 +218,36 @@ class CPUStatsMonitor:
 class CPUStatsMixin(ProcessMixin[Config], LoggerMixin[Config], Generic[Config]):
     """Defines a task mixin for getting CPU statistics."""
-    _cpu_stats_monitor: CPUStatsMonitor
+    _cpu_stats_monitor: CPUStatsMonitor | None
     def __init__(self, config: Config) -> None:
         super().__init__(config)
-        self._cpu_stats_monitor = CPUStatsMonitor(
-            ping_interval=self.config.cpu_stats.ping_interval,
-            context=self._mp_ctx,
-            manager=self._mp_manager,
-        )
+        if (ctx := self.multiprocessing_context) is not None and (mgr := self.multiprocessing_manager) is not None:
+            self._cpu_stats_monitor = CPUStatsMonitor(self.config.cpu_stats.ping_interval, ctx, mgr)
+        else:
+            self._cpu_stats_monitor = None
     def on_training_start(self, state: State) -> State:
         state = super().on_training_start(state)
-        self._cpu_stats_monitor.start()
+        if (monitor := self._cpu_stats_monitor) is not None:
+            monitor.start()
         return state
     def on_training_end(self, state: State) -> State:
         state = super().on_training_end(state)
-        self._cpu_stats_monitor.stop()
+        if (monitor := self._cpu_stats_monitor) is not None:
+            monitor.stop()
         return state
     def on_step_start(self, state: State) -> State:
         state = super().on_step_start(state)
-        monitor = self._cpu_stats_monitor
+        if (monitor := self._cpu_stats_monitor) is None:
+            return state
         stats = monitor.get_if_set() if self.config.cpu_stats.only_log_once else monitor.get()
         if stats is not None:

{xax-0.2.0 → xax-0.2.2}/xax/task/mixins/gpu_stats.py RENAMED Viewed

@@ -234,24 +234,27 @@ class GPUStatsMixin(ProcessMixin[Config], LoggerMixin[Config], Generic[Config]):
     def __init__(self, config: Config) -> None:
         super().__init__(config)
-        self._gpu_stats_monitor = None
-        if shutil.which("nvidia-smi") is not None:
-            self._gpu_stats_monitor = GPUStatsMonitor(
-                config.gpu_stats.ping_interval,
-                self._mp_ctx,
-                self._mp_manager,
-            )
+        if (
+            shutil.which("nvidia-smi") is not None
+            and (ctx := self.multiprocessing_context) is not None
+            and (mgr := self.multiprocessing_manager) is not None
+        ):
+            self._gpu_stats_monitor = GPUStatsMonitor(config.gpu_stats.ping_interval, ctx, mgr)
+        else:
+            self._gpu_stats_monitor = None
     def on_training_start(self, state: State) -> State:
         state = super().on_training_start(state)
-        if self._gpu_stats_monitor is not None:
-            self._gpu_stats_monitor.start()
+        if (monitor := self._gpu_stats_monitor) is not None:
+            monitor.start()
         return state
     def on_training_end(self, state: State) -> State:
         state = super().on_training_end(state)
-        if self._gpu_stats_monitor is not None:
-            self._gpu_stats_monitor.stop()
+        if (monitor := self._gpu_stats_monitor) is not None:
+            monitor.stop()
         return state
     def on_step_start(self, state: State) -> State:

{xax-0.2.0 → xax-0.2.2}/xax/task/mixins/process.py RENAMED Viewed

@@ -20,6 +20,7 @@ logger: logging.Logger = logging.getLogger(__name__)
 @dataclass
 class ProcessConfig(BaseConfig):
     multiprocessing_context: str | None = field("spawn", help="The multiprocessing context to use")
+    disable_multiprocessing: bool = field(False, help="If set, disable multiprocessing")
 Config = TypeVar("Config", bound=ProcessConfig)
@@ -28,27 +29,32 @@ Config = TypeVar("Config", bound=ProcessConfig)
 class ProcessMixin(BaseTask[Config], Generic[Config]):
     """Defines a base trainer mixin for handling monitoring processes."""
-    _mp_ctx: BaseContext
-    _mp_manager: SyncManager
+    _mp_ctx: BaseContext | None
+    _mp_manager: SyncManager | None
     def __init__(self, config: Config) -> None:
         super().__init__(config)
-        self._mp_ctx = mp.get_context(config.multiprocessing_context)
-        self._mp_manager = self._mp_ctx.Manager()
+        if self.config.disable_multiprocessing:
+            self._mp_ctx = None
+            self._mp_manager = None
+        else:
+            self._mp_ctx = mp.get_context(config.multiprocessing_context)
+            self._mp_manager = self._mp_ctx.Manager()
     @property
-    def multiprocessing_context(self) -> BaseContext:
+    def multiprocessing_context(self) -> BaseContext | None:
         return self._mp_ctx
     @property
-    def multiprocessing_manager(self) -> SyncManager:
+    def multiprocessing_manager(self) -> SyncManager | None:
         return self._mp_manager
     def on_training_end(self, state: State) -> State:
         state = super().on_training_end(state)
-        self._mp_manager.shutdown()
-        self._mp_manager.join()
+        if self._mp_manager is not None:
+            self._mp_manager.shutdown()
+            self._mp_manager.join()
         return state

{xax-0.2.0 → xax-0.2.2}/xax/task/mixins/train.py RENAMED Viewed

@@ -12,6 +12,7 @@ import time
 import traceback
 from abc import ABC, abstractmethod
 from dataclasses import asdict, dataclass, is_dataclass
+from pathlib import Path
 from threading import Thread
 from typing import (
     Any,
@@ -39,7 +40,7 @@ from xax.core.state import Phase, State
 from xax.nn.functions import set_random_seed
 from xax.nn.parallel import is_master
 from xax.task.mixins.artifacts import ArtifactsConfig, ArtifactsMixin
-from xax.task.mixins.checkpointing import CheckpointingConfig, CheckpointingMixin
+from xax.task.mixins.checkpointing import CheckpointingConfig, CheckpointingMixin, CheckpointPart
 from xax.task.mixins.data_loader import DataloadersConfig, DataloadersMixin
 from xax.task.mixins.logger import LoggerConfig, LoggerMixin
 from xax.task.mixins.runnable import RunnableConfig, RunnableMixin
@@ -54,7 +55,7 @@ from xax.utils.experiments import (
     get_training_code,
 )
 from xax.utils.jax import jit as xax_jit
-from xax.utils.logging import LOG_STATUS
+from xax.utils.logging import LOG_PING, LOG_STATUS
 from xax.utils.text import highlight_exception_message, show_info
 from xax.utils.types.frozen_dict import FrozenDict
@@ -340,12 +341,7 @@ class TrainMixin(
         if init_ckpt_path is not None:
             logger.info("Loading checkpoint from %s", init_ckpt_path)
-            model_spec = eqx.filter_eval_shape(self.get_model, key)
-            model, state, config = self.load_checkpoint(
-                init_ckpt_path,
-                part="model_state_config",
-                model_template=model_spec,
-            )
+            model, state, config = self.load_ckpt(init_ckpt_path, part="model_state_config")
             config_diff = get_diff_string(diff_configs(asdict(config), asdict(self.config)))
             if config_diff:
                 logger.warning("Loaded config differs from current config:\n%s", config_diff)
@@ -353,17 +349,11 @@ class TrainMixin(
             if not load_optimizer:
                 return model, state
-            # Loads the optimizer.
-            optimizer_spec = eqx.filter_eval_shape(self.get_optimizer)
-            optimizer = self.load_checkpoint(init_ckpt_path, part="opt", optimizer_template=optimizer_spec)
-            # Loads the optimizer state.
-            opt_state_spec = eqx.filter_eval_shape(self.get_initial_opt_state, model, optimizer)
-            opt_state = self.load_checkpoint(init_ckpt_path, part="opt_state", opt_state_template=opt_state_spec)
+            optimizer = self.load_ckpt(init_ckpt_path, part="opt")
+            opt_state = self.load_ckpt(init_ckpt_path, part="opt_state", model=model, optimizer=optimizer)
             return model, optimizer, opt_state, state
-        logger.info("No checkpoint found. Initializing a new model.")
+        logger.info("Starting a new training run")
         model = self.get_model(key)
         state = State.init_state()
@@ -375,6 +365,131 @@ class TrainMixin(
         return model, optimizer, opt_state, state
+    @overload
+    def load_ckpt(
+        self,
+        path: Path,
+        *,
+        part: Literal["all"],
+    ) -> tuple[PyTree, optax.GradientTransformation, optax.OptState, State, Config]: ...
+    @overload
+    def load_ckpt(
+        self,
+        path: Path,
+        *,
+        part: Literal["model_state_config"],
+    ) -> tuple[PyTree, State, Config]: ...
+    @overload
+    def load_ckpt(
+        self,
+        path: Path,
+        *,
+        part: Literal["model"],
+    ) -> PyTree: ...
+    @overload
+    def load_ckpt(
+        self,
+        path: Path,
+        *,
+        part: Literal["opt"],
+    ) -> optax.GradientTransformation: ...
+    @overload
+    def load_ckpt(
+        self,
+        path: Path,
+        *,
+        part: Literal["opt_state"],
+        model: PyTree | None = None,
+        optimizer: optax.GradientTransformation | None = None,
+    ) -> optax.OptState: ...
+    @overload
+    def load_ckpt(
+        self,
+        path: Path,
+        *,
+        part: Literal["state"],
+    ) -> State: ...
+    @overload
+    def load_ckpt(
+        self,
+        path: Path,
+        *,
+        part: Literal["config"],
+    ) -> Config: ...
+    def load_ckpt(
+        self,
+        path: str | Path,
+        *,
+        part: CheckpointPart = "all",
+        model: PyTree | None = None,
+        optimizer: optax.GradientTransformation | None = None,
+    ) -> (
+        tuple[PyTree, optax.GradientTransformation, optax.OptState, State, Config]
+        | tuple[PyTree, State, Config]
+        | PyTree
+        | optax.GradientTransformation
+        | optax.OptState
+        | State
+        | Config
+    ):
+        path = Path(path)
+        # This key isn't used for anything, it's just a required argument.
+        key = jax.random.PRNGKey(0)
+        match part:
+            case "model_state_config":
+                model_spec = eqx.filter_eval_shape(self.get_model, key)
+                return self.load_ckpt_with_template(path, part="model_state_config", model_template=model_spec)
+            case "model":
+                model_spec = eqx.filter_eval_shape(self.get_model, key)
+                return self.load_ckpt_with_template(path, part="model", model_template=model_spec)
+            case "config":
+                return self.load_ckpt_with_template(path, part="config")
+            case "opt":
+                optimizer_spec = eqx.filter_eval_shape(self.get_optimizer)
+                return self.load_ckpt_with_template(path, part="opt", optimizer_template=optimizer_spec)
+            case "opt_state":
+                if model is None:
+                    model_spec = eqx.filter_eval_shape(self.get_model, key)
+                    model = self.load_ckpt_with_template(path, part="model", model_template=model_spec)
+                if optimizer is None:
+                    optimizer_spec = eqx.filter_eval_shape(self.get_optimizer)
+                    optimizer = self.load_ckpt_with_template(path, part="opt", optimizer_template=optimizer_spec)
+                opt_state_spec = eqx.filter_eval_shape(self.get_initial_opt_state, model, optimizer)
+                return self.load_ckpt_with_template(path, part="opt_state", opt_state_template=opt_state_spec)
+            case "state":
+                return self.load_ckpt_with_template(path, part="state")
+            case "config":
+                return self.load_ckpt_with_template(path, part="config")
+            case "all":
+                model_spec = eqx.filter_eval_shape(self.get_model, key)
+                model = self.load_ckpt_with_template(path, part="model", model_template=model_spec)
+                optimizer_spec = eqx.filter_eval_shape(self.get_optimizer)
+                optimizer = self.load_ckpt_with_template(path, part="opt", optimizer_template=optimizer_spec)
+                opt_state_spec = eqx.filter_eval_shape(self.get_initial_opt_state, model, optimizer)
+                opt_state = self.load_ckpt_with_template(path, part="opt_state", opt_state_template=opt_state_spec)
+                state = self.load_ckpt_with_template(path, part="state")
+                config = self.load_ckpt_with_template(path, part="config")
+                return model, optimizer, opt_state, state, config
+            case _:
+                raise ValueError(f"Unknown checkpoint part: {part}")
     def get_output(self, model: PyTree, batch: Batch, state: State) -> Output:
         """Gets the output from the model.
@@ -529,8 +644,7 @@ class TrainMixin(
         self._last_printed_remaining_time = state.elapsed_time_s
         remaining_seconds = remaining_percent * state.elapsed_time_s / (1 - remaining_percent)
         termination_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time() + remaining_seconds))
-        # logger.info("Estimated finish time: %s", termination_time)
-        jax.debug.print("Estimated finish time: {}", termination_time)
+        logger.log(LOG_PING, "Estimated finish time: %s", termination_time)
     def get_remaining_percent(self, state: State) -> float | None:
         if self.config.max_steps is None:

{xax-0.2.0 → xax-0.2.2/xax.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: xax
-Version: 0.2.0
+Version: 0.2.2
 Summary: A library for fast Jax experimentation
 Home-page: https://github.com/kscalelabs/xax
 Author: Benjamin Bolte