PyPI - xax - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

xax 0.2.0py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

xax/__init__.py +4 -1
xax/nn/geom.py +34 -0
xax/task/mixins/checkpointing.py +9 -12
xax/task/mixins/train.py +133 -19
{xax-0.2.0.dist-info → xax-0.2.1.dist-info}/METADATA +1 -1
{xax-0.2.0.dist-info → xax-0.2.1.dist-info}/RECORD +9 -9
{xax-0.2.0.dist-info → xax-0.2.1.dist-info}/WHEEL +0 -0
{xax-0.2.0.dist-info → xax-0.2.1.dist-info}/licenses/LICENSE +0 -0
{xax-0.2.0.dist-info → xax-0.2.1.dist-info}/top_level.txt +0 -0

xax/__init__.py CHANGED Viewed

@@ -12,7 +12,7 @@ and running the update script:
     python -m scripts.update_api --inplace
 """
-__version__ = "0.2.0"
+__version__ = "0.2.1"
 # This list shouldn't be modified by hand; instead, run the update script.
 __all__ = [
@@ -44,6 +44,7 @@ __all__ = [
     "euler_to_quat",
     "get_projected_gravity_vector_from_quat",
     "quat_to_euler",
+    "quat_to_rotmat",
     "rotate_vector_by_quat",
     "cross_entropy",
     "cast_norm_type",
@@ -206,6 +207,7 @@ NAME_MAP: dict[str, str] = {
     "euler_to_quat": "nn.geom",
     "get_projected_gravity_vector_from_quat": "nn.geom",
     "quat_to_euler": "nn.geom",
+    "quat_to_rotmat": "nn.geom",
     "rotate_vector_by_quat": "nn.geom",
     "cross_entropy": "nn.losses",
     "cast_norm_type": "nn.norm",
@@ -369,6 +371,7 @@ if IMPORT_ALL or TYPE_CHECKING:
         euler_to_quat,
         get_projected_gravity_vector_from_quat,
         quat_to_euler,
+        quat_to_rotmat,
         rotate_vector_by_quat,
     )
     from xax.nn.losses import cross_entropy

xax/nn/geom.py CHANGED Viewed

@@ -177,3 +177,37 @@ def cubic_bezier_interpolation(y_start: Array, y_end: Array, x: Array) -> Array:
     y_diff = y_end - y_start
     bezier = x**3 + 3 * (x**2 * (1 - x))
     return y_start + y_diff * bezier
+def quat_to_rotmat(quat: Array, eps: float = 1e-6) -> Array:
+    """Converts a quaternion to a rotation matrix.
+    Args:
+        quat: The quaternion to convert, shape (*, 4).
+        eps: A small epsilon value to avoid division by zero.
+    Returns:
+        The rotation matrix, shape (*, 3, 3).
+    """
+    quat = quat / (jnp.linalg.norm(quat, axis=-1, keepdims=True) + eps)
+    w, x, y, z = jnp.split(quat, 4, axis=-1)
+    xx = 1 - 2 * (y * y + z * z)
+    xy = 2 * (x * y - z * w)
+    xz = 2 * (x * z + y * w)
+    yx = 2 * (x * y + z * w)
+    yy = 1 - 2 * (x * x + z * z)
+    yz = 2 * (y * z - x * w)
+    zx = 2 * (x * z - y * w)
+    zy = 2 * (y * z + x * w)
+    zz = 1 - 2 * (x * x + y * y)
+    # Corrected stacking: row-major order
+    return jnp.concatenate(
+        [
+            jnp.concatenate([xx, xy, xz], axis=-1)[..., None, :],
+            jnp.concatenate([yx, yy, yz], axis=-1)[..., None, :],
+            jnp.concatenate([zx, zy, zz], axis=-1)[..., None, :],
+        ],
+        axis=-2,
+    )

xax/task/mixins/checkpointing.py CHANGED Viewed

@@ -63,10 +63,7 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
     def get_init_ckpt_path(self) -> Path | None:
         if self._exp_dir is not None:
-            ckpt_path = self.get_ckpt_path()
-            if not ckpt_path.exists():
-                logger.warning("No checkpoint found in experiment directory: %s", ckpt_path)
-            else:
+            if (ckpt_path := self.get_ckpt_path()).exists():
                 return ckpt_path
         if self.config.load_from_ckpt_path is not None:
             ckpt_path = Path(self.config.load_from_ckpt_path)
@@ -86,7 +83,7 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
         return False
     @overload
-    def load_checkpoint(
+    def load_ckpt_with_template(
         self,
         path: Path,
         *,
@@ -97,7 +94,7 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
     ) -> tuple[PyTree, optax.GradientTransformation, optax.OptState, State, Config]: ...
     @overload
-    def load_checkpoint(
+    def load_ckpt_with_template(
         self,
         path: Path,
         *,
@@ -106,7 +103,7 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
     ) -> tuple[PyTree, State, Config]: ...
     @overload
-    def load_checkpoint(
+    def load_ckpt_with_template(
         self,
         path: Path,
         *,
@@ -115,7 +112,7 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
     ) -> PyTree: ...
     @overload
-    def load_checkpoint(
+    def load_ckpt_with_template(
         self,
         path: Path,
         *,
@@ -124,7 +121,7 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
     ) -> optax.GradientTransformation: ...
     @overload
-    def load_checkpoint(
+    def load_ckpt_with_template(
         self,
         path: Path,
         *,
@@ -133,7 +130,7 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
     ) -> optax.OptState: ...
     @overload
-    def load_checkpoint(
+    def load_ckpt_with_template(
         self,
         path: Path,
         *,
@@ -141,14 +138,14 @@ class CheckpointingMixin(ArtifactsMixin[Config], Generic[Config]):
     ) -> State: ...
     @overload
-    def load_checkpoint(
+    def load_ckpt_with_template(
         self,
         path: Path,
         *,
         part: Literal["config"],
     ) -> Config: ...
-    def load_checkpoint(
+    def load_ckpt_with_template(
         self,
         path: Path,
         *,

xax/task/mixins/train.py CHANGED Viewed

@@ -12,6 +12,7 @@ import time
 import traceback
 from abc import ABC, abstractmethod
 from dataclasses import asdict, dataclass, is_dataclass
+from pathlib import Path
 from threading import Thread
 from typing import (
     Any,
@@ -39,7 +40,7 @@ from xax.core.state import Phase, State
 from xax.nn.functions import set_random_seed
 from xax.nn.parallel import is_master
 from xax.task.mixins.artifacts import ArtifactsConfig, ArtifactsMixin
-from xax.task.mixins.checkpointing import CheckpointingConfig, CheckpointingMixin
+from xax.task.mixins.checkpointing import CheckpointingConfig, CheckpointingMixin, CheckpointPart
 from xax.task.mixins.data_loader import DataloadersConfig, DataloadersMixin
 from xax.task.mixins.logger import LoggerConfig, LoggerMixin
 from xax.task.mixins.runnable import RunnableConfig, RunnableMixin
@@ -54,7 +55,7 @@ from xax.utils.experiments import (
     get_training_code,
 )
 from xax.utils.jax import jit as xax_jit
-from xax.utils.logging import LOG_STATUS
+from xax.utils.logging import LOG_PING, LOG_STATUS
 from xax.utils.text import highlight_exception_message, show_info
 from xax.utils.types.frozen_dict import FrozenDict
@@ -340,12 +341,7 @@ class TrainMixin(
         if init_ckpt_path is not None:
             logger.info("Loading checkpoint from %s", init_ckpt_path)
-            model_spec = eqx.filter_eval_shape(self.get_model, key)
-            model, state, config = self.load_checkpoint(
-                init_ckpt_path,
-                part="model_state_config",
-                model_template=model_spec,
-            )
+            model, state, config = self.load_ckpt(init_ckpt_path, part="model_state_config")
             config_diff = get_diff_string(diff_configs(asdict(config), asdict(self.config)))
             if config_diff:
                 logger.warning("Loaded config differs from current config:\n%s", config_diff)
@@ -353,17 +349,11 @@ class TrainMixin(
             if not load_optimizer:
                 return model, state
-            # Loads the optimizer.
-            optimizer_spec = eqx.filter_eval_shape(self.get_optimizer)
-            optimizer = self.load_checkpoint(init_ckpt_path, part="opt", optimizer_template=optimizer_spec)
-            # Loads the optimizer state.
-            opt_state_spec = eqx.filter_eval_shape(self.get_initial_opt_state, model, optimizer)
-            opt_state = self.load_checkpoint(init_ckpt_path, part="opt_state", opt_state_template=opt_state_spec)
+            optimizer = self.load_ckpt(init_ckpt_path, part="opt")
+            opt_state = self.load_ckpt(init_ckpt_path, part="opt_state", model=model, optimizer=optimizer)
             return model, optimizer, opt_state, state
-        logger.info("No checkpoint found. Initializing a new model.")
+        logger.info("Starting a new training run")
         model = self.get_model(key)
         state = State.init_state()
@@ -375,6 +365,131 @@ class TrainMixin(
         return model, optimizer, opt_state, state
+    @overload
+    def load_ckpt(
+        self,
+        path: Path,
+        *,
+        part: Literal["all"],
+    ) -> tuple[PyTree, optax.GradientTransformation, optax.OptState, State, Config]: ...
+    @overload
+    def load_ckpt(
+        self,
+        path: Path,
+        *,
+        part: Literal["model_state_config"],
+    ) -> tuple[PyTree, State, Config]: ...
+    @overload
+    def load_ckpt(
+        self,
+        path: Path,
+        *,
+        part: Literal["model"],
+    ) -> PyTree: ...
+    @overload
+    def load_ckpt(
+        self,
+        path: Path,
+        *,
+        part: Literal["opt"],
+    ) -> optax.GradientTransformation: ...
+    @overload
+    def load_ckpt(
+        self,
+        path: Path,
+        *,
+        part: Literal["opt_state"],
+        model: PyTree | None = None,
+        optimizer: optax.GradientTransformation | None = None,
+    ) -> optax.OptState: ...
+    @overload
+    def load_ckpt(
+        self,
+        path: Path,
+        *,
+        part: Literal["state"],
+    ) -> State: ...
+    @overload
+    def load_ckpt(
+        self,
+        path: Path,
+        *,
+        part: Literal["config"],
+    ) -> Config: ...
+    def load_ckpt(
+        self,
+        path: str | Path,
+        *,
+        part: CheckpointPart = "all",
+        model: PyTree | None = None,
+        optimizer: optax.GradientTransformation | None = None,
+    ) -> (
+        tuple[PyTree, optax.GradientTransformation, optax.OptState, State, Config]
+        | tuple[PyTree, State, Config]
+        | PyTree
+        | optax.GradientTransformation
+        | optax.OptState
+        | State
+        | Config
+    ):
+        path = Path(path)
+        # This key isn't used for anything, it's just a required argument.
+        key = jax.random.PRNGKey(0)
+        match part:
+            case "model_state_config":
+                model_spec = eqx.filter_eval_shape(self.get_model, key)
+                return self.load_ckpt_with_template(path, part="model_state_config", model_template=model_spec)
+            case "model":
+                model_spec = eqx.filter_eval_shape(self.get_model, key)
+                return self.load_ckpt_with_template(path, part="model", model_template=model_spec)
+            case "config":
+                return self.load_ckpt_with_template(path, part="config")
+            case "opt":
+                optimizer_spec = eqx.filter_eval_shape(self.get_optimizer)
+                return self.load_ckpt_with_template(path, part="opt", optimizer_template=optimizer_spec)
+            case "opt_state":
+                if model is None:
+                    model_spec = eqx.filter_eval_shape(self.get_model, key)
+                    model = self.load_ckpt_with_template(path, part="model", model_template=model_spec)
+                if optimizer is None:
+                    optimizer_spec = eqx.filter_eval_shape(self.get_optimizer)
+                    optimizer = self.load_ckpt_with_template(path, part="opt", optimizer_template=optimizer_spec)
+                opt_state_spec = eqx.filter_eval_shape(self.get_initial_opt_state, model, optimizer)
+                return self.load_ckpt_with_template(path, part="opt_state", opt_state_template=opt_state_spec)
+            case "state":
+                return self.load_ckpt_with_template(path, part="state")
+            case "config":
+                return self.load_ckpt_with_template(path, part="config")
+            case "all":
+                model_spec = eqx.filter_eval_shape(self.get_model, key)
+                model = self.load_ckpt_with_template(path, part="model", model_template=model_spec)
+                optimizer_spec = eqx.filter_eval_shape(self.get_optimizer)
+                optimizer = self.load_ckpt_with_template(path, part="opt", optimizer_template=optimizer_spec)
+                opt_state_spec = eqx.filter_eval_shape(self.get_initial_opt_state, model, optimizer)
+                opt_state = self.load_ckpt_with_template(path, part="opt_state", opt_state_template=opt_state_spec)
+                state = self.load_ckpt_with_template(path, part="state")
+                config = self.load_ckpt_with_template(path, part="config")
+                return model, optimizer, opt_state, state, config
+            case _:
+                raise ValueError(f"Unknown checkpoint part: {part}")
     def get_output(self, model: PyTree, batch: Batch, state: State) -> Output:
         """Gets the output from the model.
@@ -529,8 +644,7 @@ class TrainMixin(
         self._last_printed_remaining_time = state.elapsed_time_s
         remaining_seconds = remaining_percent * state.elapsed_time_s / (1 - remaining_percent)
         termination_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time() + remaining_seconds))
-        # logger.info("Estimated finish time: %s", termination_time)
-        jax.debug.print("Estimated finish time: {}", termination_time)
+        logger.log(LOG_PING, "Estimated finish time: %s", termination_time)
     def get_remaining_percent(self, state: State) -> float | None:
         if self.config.max_steps is None:

{xax-0.2.0.dist-info → xax-0.2.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: xax
-Version: 0.2.0
+Version: 0.2.1
 Summary: A library for fast Jax experimentation
 Home-page: https://github.com/kscalelabs/xax
 Author: Benjamin Bolte

{xax-0.2.0.dist-info → xax-0.2.1.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-xax/__init__.py,sha256=CO9UZlYsYsDL2B6z-Id0Fv0ZSD5uwUZ3eZ6zwwqtJhU,14103
+xax/__init__.py,sha256=kd-88OQGnuHb91PXwroAfLb0bMfbe37fXqpECRrjhoU,14182
 xax/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 xax/requirements-dev.txt,sha256=qkscNkFzWd1S5fump-AKH53rR65v2x5FmboFdy_kKvs,128
 xax/requirements.txt,sha256=6qY-84e-sTmlfJNrSjwONQKqzAn5h8G_oGIhnhmfSr4,302
@@ -10,7 +10,7 @@ xax/nn/embeddings.py,sha256=bQGxBFxkLwi2MQLkRfGaHPH5P_KKB21HdI7VNWTKIOQ,11847
 xax/nn/equinox.py,sha256=5fdOKRXqAVZPsV-aEez3i1wamr_oBYnG74GP1jEthjM,4843
 xax/nn/export.py,sha256=7Yemw3T33QGEP8RkmTkpu6tRVOhut2RUJmttNFfCgFw,5537
 xax/nn/functions.py,sha256=CI_OmspaQwN9nl4hwefIU3_I7m6gBZwJ9aGK1JGUgr0,2713
-xax/nn/geom.py,sha256=PN0Ndn575aVtsSfxi67RghHB7luRkqtpS7bPbT1LpLE,5201
+xax/nn/geom.py,sha256=rImNlkHWeoNcY7f84nknizJ6uzsrMhbAtKeb2xAWxNY,6215
 xax/nn/losses.py,sha256=Q_NVnm5n4UPBvp5nI_1aUptfXnqFYoUeFwySiyvopHg,272
 xax/nn/norm.py,sha256=WgZ3QCrUnf-YecwhEtVPcr99fKK3ECl_UeiAs2uv7oo,564
 xax/nn/parallel.py,sha256=fnTiT7MsG7eQrJvqwjIz2Ifo3P27TuxIJzmpGYSa_dQ,4608
@@ -32,7 +32,7 @@ xax/task/loggers/stdout.py,sha256=oeIgPkj4RyJgBuWaJK9ncLa65iBNJCWXhSF8fx3_54c,65
 xax/task/loggers/tensorboard.py,sha256=KOL9l60tLctX-VAdNwe49H48SAJeGxph3sflJpojA-4,8337
 xax/task/mixins/__init__.py,sha256=D3oU31rB9FeOr9MPLleLt5JFbftUr4sBTwgnwQdc2qA,809
 xax/task/mixins/artifacts.py,sha256=2ezmZGzPGe3nhsd9KRkeHWWXdbT9m7drzimIfw6v1XY,2892
-xax/task/mixins/checkpointing.py,sha256=JHBOdcgmJvhyXldPF5pHRmyPUN9SHcxxngsC1ap4b1E,11468
+xax/task/mixins/checkpointing.py,sha256=2nJgqFcV-D8W-4j8TR3PvVh1g5hQUOo-_quKO-XlE4U,11398
 xax/task/mixins/compile.py,sha256=PG5aF3W9v_xGiImHgUJ7gmwuQQoSQWufdpl2N_mlLX0,3922
 xax/task/mixins/cpu_stats.py,sha256=vAjEc3HpPnl56m7vshYX0dXAHJrB98DzVdsYSRqQllc,9371
 xax/task/mixins/data_loader.py,sha256=Tp7zqPdfH2_JuE6J6EP-fEtCQpq9MjKlGHYK7Zh-goU,6599
@@ -41,7 +41,7 @@ xax/task/mixins/logger.py,sha256=6oXsJJyNUx6YT3q58FVXMZBUpMgjVkGre6BXFN20cVI,280
 xax/task/mixins/process.py,sha256=d1opVgvc6bOFXb7R58b07F4P5lbSZIzYaajtE0eBbpw,1477
 xax/task/mixins/runnable.py,sha256=IYIsLd2k09g-_y6o44EhJqT7E6BpsyEMmsyLSuzqjtc,1979
 xax/task/mixins/step_wrapper.py,sha256=-Yu5Nft2CRw1JvZt6J_94SM1vqX8fk08IDK95Pmd2ew,1648
-xax/task/mixins/train.py,sha256=t8Qyw40ahuJW0SPVgFLljqYbbSc1M_WLop87iwYE41Q,27064
+xax/task/mixins/train.py,sha256=v9oi9tNsNBYo-Ne_98nCG9qHX6sxvymHjsRDnL6GL-U,30871
 xax/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 xax/utils/debugging.py,sha256=OtUdu-3tQsQtik0Q9UM-SNV46IbPjwrAfZcywzoB5d4,1940
 xax/utils/experiments.py,sha256=Hzl46_9IH5_9cKzxit-FyVUWBH-_lBs00ZciuIdnWO8,29811
@@ -58,8 +58,8 @@ xax/utils/data/collate.py,sha256=Rd9vMomr_S_zCa_Hi4dO-8ntzAfVwndIUtuXFA3iNcc,706
 xax/utils/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 xax/utils/types/frozen_dict.py,sha256=ZCMGfSfr2_b2qZbq9ywPD0zej5tpVSId2JftXpwfB5k,4686
 xax/utils/types/hashable_array.py,sha256=l5iIcFmkYzfGeaZmcSoeFkthFASqM8xJYK3AXhZQYwc,992
-xax-0.2.0.dist-info/licenses/LICENSE,sha256=HCN2bImAzUOXldAZZI7JZ9PYq6OwMlDAP_PpX1HnuN0,1071
-xax-0.2.0.dist-info/METADATA,sha256=FyMDy4yB_KQF_IdCMMe_10VWpIEE5g6qEIZuXx-pLgU,1882
-xax-0.2.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-xax-0.2.0.dist-info/top_level.txt,sha256=g4Au_r2XhvZ-lTybviH-Fh9g0zF4DAYHYxPue1-xbs8,4
-xax-0.2.0.dist-info/RECORD,,
+xax-0.2.1.dist-info/licenses/LICENSE,sha256=HCN2bImAzUOXldAZZI7JZ9PYq6OwMlDAP_PpX1HnuN0,1071
+xax-0.2.1.dist-info/METADATA,sha256=2pOZLKMIcLoQTM-tRqRvVkF57PZyMoALM87UI5B4dtk,1882
+xax-0.2.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+xax-0.2.1.dist-info/top_level.txt,sha256=g4Au_r2XhvZ-lTybviH-Fh9g0zF4DAYHYxPue1-xbs8,4
+xax-0.2.1.dist-info/RECORD,,

{xax-0.2.0.dist-info → xax-0.2.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{xax-0.2.0.dist-info → xax-0.2.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{xax-0.2.0.dist-info → xax-0.2.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

xax 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

xax 0.2.0py3-none-any.whl → 0.2.1py3-none-any.whl