PyPI - xax - Versions diffs - 0.2.21__tar.gz → 0.2.22__tar.gz - Mend

xax 0.2.21tar.gz → 0.2.22tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

{xax-0.2.21/xax.egg-info → xax-0.2.22}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: xax
-Version: 0.2.21
+Version: 0.2.22
 Summary: A library for fast Jax experimentation
 Home-page: https://github.com/kscalelabs/xax
 Author: Benjamin Bolte

{xax-0.2.21 → xax-0.2.22}/xax/__init__.py RENAMED Viewed

@@ -12,7 +12,7 @@ and running the update script:
     python -m scripts.update_api --inplace
 """
-__version__ = "0.2.21"
+__version__ = "0.2.22"
 # This list shouldn't be modified by hand; instead, run the update script.
 __all__ = [
@@ -112,8 +112,10 @@ __all__ = [
     "save_config",
     "stage_environment",
     "to_markdown_table",
+    "grad",
     "jit",
     "scan",
+    "vmap",
     "save_jaxpr_dot",
     "ColoredFormatter",
     "configure_logging",
@@ -287,8 +289,10 @@ NAME_MAP: dict[str, str] = {
     "save_config": "utils.experiments",
     "stage_environment": "utils.experiments",
     "to_markdown_table": "utils.experiments",
+    "grad": "utils.jax",
     "jit": "utils.jax",
     "scan": "utils.jax",
+    "vmap": "utils.jax",
     "save_jaxpr_dot": "utils.jaxpr",
     "ColoredFormatter": "utils.logging",
     "configure_logging": "utils.logging",
@@ -460,7 +464,7 @@ if IMPORT_ALL or TYPE_CHECKING:
         stage_environment,
         to_markdown_table,
     )
-    from xax.utils.jax import jit, scan
+    from xax.utils.jax import grad, jit, scan, vmap
     from xax.utils.jaxpr import save_jaxpr_dot
     from xax.utils.logging import (
         LOG_ERROR_SUMMARY,

{xax-0.2.21 → xax-0.2.22}/xax/task/mixins/train.py RENAMED Viewed

@@ -625,9 +625,13 @@ class TrainMixin(
         grad_metrics = {"grad_norm": grad_norm}
         def apply(grads: PyTree, grad_norm: Array) -> tuple[PyTree, optax.OptState]:
-            # Clip the global gradient norm to some desired range.
-            grad_factor = self.config.global_grad_clip / jnp.maximum(grad_norm, 1e-6)
-            grads = jax.tree.map(lambda x: x * grad_factor, grads)
+            # Clip gradients based on global norm, similar to optax.clip_by_global_norm
+            trigger = jnp.squeeze(grad_norm < self.config.global_grad_clip)
+            def clip_fn(t: Array) -> Array:
+                return jax.lax.select(trigger, t, (t / grad_norm.astype(t.dtype)) * self.config.global_grad_clip)
+            grads = jax.tree.map(clip_fn, grads)
             # Apply the gradient updates.
             updates, new_opt_state = optimizer.update(grads, opt_state, model_arr)

{xax-0.2.21 → xax-0.2.22}/xax/utils/jax.py RENAMED Viewed

@@ -6,13 +6,14 @@ import logging
 import os
 import time
 from functools import wraps
-from typing import Any, Callable, Iterable, ParamSpec, Sequence, TypeVar, cast
+from typing import Any, Callable, Hashable, Iterable, ParamSpec, Sequence, TypeVar, cast
 import jax
 import jax.numpy as jnp
 import numpy as np
 from jax._src import sharding_impls
 from jax._src.lib import xla_client as xc
+from jaxtyping import PyTree
 logger = logging.getLogger(__name__)
@@ -20,6 +21,7 @@ DEFAULT_COMPILE_TIMEOUT = 1.0
 Number = int | float | np.ndarray | jnp.ndarray
+T = TypeVar("T", bound=PyTree)
 P = ParamSpec("P")  # For function parameters
 R = TypeVar("R")  # For function return type
@@ -29,6 +31,9 @@ Carry = TypeVar("Carry")
 X = TypeVar("X")
 Y = TypeVar("Y")
+F = TypeVar("F", bound=Callable)
+AxisName = Hashable
 @functools.lru_cache(maxsize=None)
 def disable_jit_level() -> int:
@@ -166,6 +171,22 @@ def jit(
     return decorator
+def _split_module(tree: T, axis: int = 0) -> list[T]:
+    """Splits a module in the same way that jax.lax.scan and jax.vmap do.
+    Args:
+        tree: The tree to split.
+        axis: The axis to split on.
+    Returns:
+        A list of the split trees.
+    """
+    first_leaf = jax.tree.leaves(tree)[0]
+    num_slices = first_leaf.shape[axis]
+    result = [jax.tree.map(lambda x, idx=i: jnp.take(x, idx, axis=axis), tree) for i in range(num_slices)]
+    return result
 def scan(
     f: Callable[[Carry, X], tuple[Carry, Y]],
     init: Carry,
@@ -195,15 +216,96 @@ def scan(
     if not should_disable_jit(jit_level):
         return jax.lax.scan(f, init, xs, length, reverse, unroll)
+    carry = init
+    ys = []
     if xs is None:
         if length is None:
             raise ValueError("length must be provided if xs is None")
-        xs = cast(X, [None] * length)
+        for _ in range(length) if not reverse else range(length - 1, -1, -1):
+            carry, y = f(carry, None)  # type: ignore[arg-type]
+            ys.append(y)
-    carry = init
-    ys = []
-    for x in cast(Iterable, xs):
-        carry, y = f(carry, x)
-        ys.append(y)
+    else:
+        xlist = _split_module(xs, axis=0)
+        if reverse:
+            xlist = xlist[::-1]
+        for x in xlist:
+            carry, y = f(carry, x)
+            ys.append(y)
+    if reverse:
+        ys = ys[::-1]
+    if not ys:
+        return carry, jnp.array([])  # type: ignore[return-value]
     return carry, jax.tree.map(lambda *ys: jnp.stack(ys), *ys)
+def vmap(
+    fun: Callable[P, R],
+    in_axes: int | Sequence[int | None] = 0,
+    jit_level: int | None = None,
+) -> Callable[P, R]:
+    """A wrapper around jax.lax.vmap that allows for more flexible tracing.
+    If the provided JIT level is below the environment JIT level, we manually
+    unroll the scan function as a for loop.
+    """
+    if not should_disable_jit(jit_level):
+        return jax.vmap(fun, in_axes=in_axes)
+    @functools.wraps(fun)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> R:
+        if kwargs:
+            raise ValueError("vmap does not support keyword arguments")
+        ia = in_axes
+        if isinstance(ia, int):
+            ia = [ia] * len(args)
+        elif len(ia) != len(args):
+            raise ValueError("in_axes must be the same length as args")
+        if not all(isinstance(a, int) or a is None for a in ia):
+            raise ValueError("in_axes must be a list of integers or None")
+        ns = next((len(_split_module(a, axis=i)) for i, a in zip(ia, args, strict=True) if i is not None), None)
+        if ns is None:
+            return fun(*args, **kwargs)
+        split_args = [[a] * ns if i is None else _split_module(a, axis=i) for i, a in zip(ia, args, strict=True)]
+        split_outputs = [fun(*sargs, **kwargs) for sargs in zip(*split_args, strict=True)]
+        if not split_outputs:
+            return jnp.array([])  # type: ignore[return-value]
+        return jax.tree.map(lambda *ys: jnp.stack(ys), *split_outputs)
+    return wrapped
+def grad(
+    fun: Callable[P, R],
+    argnums: int | Sequence[int] = 0,
+    has_aux: bool = False,
+    holomorphic: bool = False,
+    allow_int: bool = False,
+    reduce_axes: Sequence[AxisName] = (),
+    jit_level: int | None = None,
+) -> Callable:
+    """A wrapper around jax.grad that allows for more flexible tracing.
+    We don't do anything special here, we just manually evaluate the function
+    if the JIT level is below the environment JIT level.
+    """
+    if not should_disable_jit(jit_level):
+        return jax.grad(fun, argnums, has_aux, holomorphic, allow_int, reduce_axes)
+    @functools.wraps(fun)
+    def wrapped(*args: P.args, **kwargs: P.kwargs) -> Callable:
+        # Evaluate the function once, then just return the gradient.
+        fun(*args, **kwargs)
+        return jax.grad(fun, argnums, has_aux, holomorphic, allow_int, reduce_axes)(*args, **kwargs)
+    return wrapped

{xax-0.2.21 → xax-0.2.22/xax.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: xax
-Version: 0.2.21
+Version: 0.2.22
 Summary: A library for fast Jax experimentation
 Home-page: https://github.com/kscalelabs/xax
 Author: Benjamin Bolte