PyPI - xax - Versions diffs - 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl - Mend

xax 0.3.10py3-none-any.whl → 0.3.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

xax/__init__.py +10 -2
xax/nn/distributions.py +51 -53
xax/task/mixins/train.py +0 -1
xax/utils/pytree.py +74 -10
{xax-0.3.10.dist-info → xax-0.3.12.dist-info}/METADATA +1 -1
{xax-0.3.10.dist-info → xax-0.3.12.dist-info}/RECORD +10 -10
{xax-0.3.10.dist-info → xax-0.3.12.dist-info}/WHEEL +0 -0
{xax-0.3.10.dist-info → xax-0.3.12.dist-info}/entry_points.txt +0 -0
{xax-0.3.10.dist-info → xax-0.3.12.dist-info}/licenses/LICENSE +0 -0
{xax-0.3.10.dist-info → xax-0.3.12.dist-info}/top_level.txt +0 -0

xax/__init__.py CHANGED Viewed

@@ -12,7 +12,7 @@ and running the update script:
     python -m scripts.update_api --inplace
 """
-__version__ = "0.3.10"
+__version__ = "0.3.12"
 # This list shouldn't be modified by hand; instead, run the update script.
 __all__ = [
@@ -138,6 +138,7 @@ __all__ = [
     "worker_chunk",
     "profile",
     "compute_nan_ratio",
+    "diff_pytree",
     "flatten_array",
     "flatten_pytree",
     "get_pytree_mapping",
@@ -330,6 +331,7 @@ NAME_MAP: dict[str, str] = {
     "worker_chunk": "utils.numpy",
     "profile": "utils.profile",
     "compute_nan_ratio": "utils.pytree",
+    "diff_pytree": "utils.pytree",
     "flatten_array": "utils.pytree",
     "flatten_pytree": "utils.pytree",
     "get_pytree_mapping": "utils.pytree",
@@ -413,7 +415,12 @@ if IMPORT_ALL or TYPE_CHECKING:
         TransformerCache,
         TransformerStack,
     )
-    from xax.nn.distributions import Categorical, Distribution, MixtureOfGaussians, Normal
+    from xax.nn.distributions import (
+        Categorical,
+        Distribution,
+        MixtureOfGaussians,
+        Normal,
+    )
     from xax.nn.embeddings import (
         EmbeddingKind,
         FourierEmbeddings,
@@ -518,6 +525,7 @@ if IMPORT_ALL or TYPE_CHECKING:
     from xax.utils.profile import profile
     from xax.utils.pytree import (
         compute_nan_ratio,
+        diff_pytree,
         flatten_array,
         flatten_pytree,
         get_pytree_mapping,

xax/nn/distributions.py CHANGED Viewed

@@ -18,6 +18,9 @@ import jax
 import jax.numpy as jnp
 from jaxtyping import Array, PRNGKeyArray
+STD_CLIP = 1e-6
+LOGIT_CLIP = 6.0
 class Distribution(ABC):
     @abstractmethod
@@ -34,87 +37,91 @@ class Distribution(ABC):
 class Categorical(Distribution):
-    def __init__(self, logits_n: Array) -> None:
-        self.logits_n = logits_n
+    def __init__(self, logits_nc: Array, logit_clip: float = LOGIT_CLIP) -> None:
+        """Initialize a categorical distribution.
+        Args:
+            logits_nc: Array of shape (..., n_categories) containing logits
+            logit_clip: Clipping value for logits
+        """
+        self.logits_nc = jnp.clip(logits_nc, -logit_clip, logit_clip)
     @property
     def num_categories(self) -> int:
-        return self.logits_n.shape[-1]
+        return self.logits_nc.shape[-1]
-    def log_prob(self, x: Array) -> Array:
-        """Compute log probability for specific categories.
-        Args:
-            x: Array of category indices
-        Returns:
-            Log probabilities for the given categories
-        """
-        log_probs = jax.nn.log_softmax(self.logits_n, axis=-1)
-        # Use advanced indexing to get the log probabilities for the given categories
-        return log_probs[x]
+    def log_prob(self, x_n: Array) -> Array:
+        log_probs_n = jax.nn.log_softmax(self.logits_nc, axis=-1)
+        return log_probs_n[x_n]
     def sample(self, key: PRNGKeyArray) -> Array:
-        return jax.random.categorical(key, self.logits_n, axis=-1)
+        return jax.random.categorical(key, self.logits_nc, axis=-1)
     def mode(self) -> Array:
-        return self.logits_n.argmax(axis=-1)
+        return self.logits_nc.argmax(axis=-1)
     def entropy(self) -> Array:
-        """Compute entropy of the categorical distribution."""
-        probs = jax.nn.softmax(self.logits_n, axis=-1)
-        log_probs = jax.nn.log_softmax(self.logits_n, axis=-1)
+        probs = jax.nn.softmax(self.logits_nc, axis=-1)
+        log_probs = jax.nn.log_softmax(self.logits_nc, axis=-1)
         return -jnp.sum(probs * log_probs, axis=-1)
 class Normal(Distribution):
-    def __init__(self, loc: Array, scale: Array) -> None:
-        self.loc = loc
-        self.scale = scale
+    def __init__(self, loc_n: Array, scale_n: Array, std_clip: float = STD_CLIP) -> None:
+        """Initialize a normal distribution.
+        Args:
+            loc_n: Mean of the distribution
+            scale_n: Standard deviation of the distribution
+            std_clip: Minimum standard deviation
+        """
+        self.loc_n = loc_n
+        self.scale_n = jnp.clip(scale_n, min=std_clip)
     def log_prob(self, x: Array) -> Array:
-        return -0.5 * jnp.log(2 * jnp.pi) - jnp.log(self.scale) - (x - self.loc) ** 2 / (2 * self.scale**2)
+        return -0.5 * jnp.log(2 * jnp.pi) - jnp.log(self.scale_n) - (x - self.loc_n) ** 2 / (2 * self.scale_n**2)
     def sample(self, key: PRNGKeyArray) -> Array:
-        return self.loc + self.scale * jax.random.normal(key, self.loc.shape)
+        return self.loc_n + self.scale_n * jax.random.normal(key, self.loc_n.shape)
     def mode(self) -> Array:
-        return self.loc
+        return self.loc_n
     def entropy(self) -> Array:
-        return jnp.log(2 * jnp.pi * jnp.e) + jnp.log(self.scale)
+        return jnp.log(2 * jnp.pi * jnp.e) + jnp.log(self.scale_n)
 class MixtureOfGaussians(Distribution):
-    def __init__(self, means_nm: Array, stds_nm: Array, logits_nm: Array) -> None:
+    def __init__(
+        self,
+        means_nm: Array,
+        stds_nm: Array,
+        logits_nm: Array,
+        std_clip: float = STD_CLIP,
+        logit_clip: float = LOGIT_CLIP,
+    ) -> None:
         """Initialize a mixture of Gaussians.
         Args:
             means_nm: Array of shape (..., n_components) containing means
             stds_nm: Array of shape (..., n_components) containing standard deviations
             logits_nm: Array of shape (..., n_components) containing mixing logits
+            std_clip: Minimum standard deviation
+            logit_clip: Clipping value for logits
         """
         self.means_nm = means_nm
-        self.stds_nm = stds_nm
-        self.logits_nm = logits_nm
-    def log_prob(self, x: Array) -> Array:
-        """Compute log probability of the mixture.
+        self.stds_nm = jnp.clip(stds_nm, min=std_clip)
+        self.logits_nm = jnp.clip(logits_nm, -logit_clip, logit_clip)
-        Args:
-            x: Array of shape (...,) containing values to evaluate
-        Returns:
-            Log probabilities of shape (...,)
-        """
+    def log_prob(self, x_n: Array) -> Array:
         # Expand x to match component dimensions
-        x_expanded = x[..., None]  # Shape: (..., 1)
+        x_n_expanded = x_n[..., None]  # Shape: (..., 1)
         # Compute log probabilities for each component
         component_log_probs = (
             -0.5 * jnp.log(2 * jnp.pi)
             - jnp.log(self.stds_nm)
-            - (x_expanded - self.means_nm) ** 2 / (2 * self.stds_nm**2)
+            - (x_n_expanded - self.means_nm) ** 2 / (2 * self.stds_nm**2)
         )
         # Compute mixing weights
@@ -123,16 +130,7 @@ class MixtureOfGaussians(Distribution):
         # Combine using log-sum-exp trick for numerical stability
         return jax.scipy.special.logsumexp(component_log_probs + mixing_logits, axis=-1)
-    def sample(self, key: PRNGKeyArray) -> Array:
-        """Sample from the mixture of Gaussians.
-        Args:
-            key: PRNG key
-        Returns:
-            Samples of shape (...,) where ... are the batch dimensions
-        """
-        # Sample component indices
+    def sample(self, key: PRNGKeyArray) -> Array:  # Sample component indices
         component_key, sample_key = jax.random.split(key)
         component_indices = jax.random.categorical(component_key, self.logits_nm, axis=-1)
@@ -153,8 +151,8 @@ class MixtureOfGaussians(Distribution):
         noise = jax.random.normal(sample_key, selected_means.shape)
         # Reshape back to original batch shape
-        samples = selected_means + selected_stds * noise
-        return samples.reshape(batch_shape)
+        samples_n = selected_means + selected_stds * noise
+        return samples_n.reshape(batch_shape)
     def mode(self) -> Array:
         """Return the mode of the mixture (approximate - returns mean of highest weight component)."""

xax/task/mixins/train.py CHANGED Viewed

@@ -177,7 +177,6 @@ class TrainConfig(
     step_kind: str = field("step", help=f"How to measure a step; one of [{', '.join(get_args(StepKind))}]")
     updates_per_step: int = field(1, help="Number of updates to perform per step")
     random_seed: int = field(1337, help="Random seed for the task")
-    global_grad_clip: float = field(value=10.0, help="The maximum gradient norm to clip to.")
 Config = TypeVar("Config", bound=TrainConfig)

xax/utils/pytree.py CHANGED Viewed

@@ -1,12 +1,15 @@
 """Utils for accessing, modifying, and otherwise manipulating pytrees."""
+from dataclasses import fields, is_dataclass
 from typing import Mapping, Sequence, TypeVar
 import chex
 import equinox as eqx
 import jax
 import jax.numpy as jnp
+import numpy as np
 from jax import Array
+from jax.core import get_aval
 from jaxtyping import PRNGKeyArray, PyTree
 T = TypeVar("T")
@@ -258,18 +261,79 @@ def tuple_insert(t: tuple[T, ...], index: int, value: T) -> tuple[T, ...]:
 def get_pytree_mapping(pytree: PyTree) -> dict[str, Array]:
     leaves: dict[str, Array] = {}
-    def _get_str(thing: PyTree) -> str:
-        if isinstance(thing, str):
-            return thing
-        if isinstance(thing, Sequence):
-            return "/".join(_get_str(x) for x in thing)
-        if isinstance(thing, Mapping):
-            return "/".join(f"{_get_str(k)}:{_get_str(v)}" for k, v in thing.items())
-        return str(thing)
     def _get_leaf(path: tuple, x: PyTree) -> None:
         if isinstance(x, jnp.ndarray):
-            leaves[_get_str(path)] = x
+            leaves[jax.tree_util.keystr(path, simple=True, separator="/")] = x
     jax.tree.map_with_path(_get_leaf, pytree)
     return leaves
+def diff_pytree(tree_a: PyTree, tree_b: PyTree, prefix: str = "") -> list[str]:
+    diffs = []
+    # Handles dataclasses.
+    if is_dataclass(tree_a) and is_dataclass(tree_b):
+        for field in fields(tree_a):
+            attr_a, attr_b = getattr(tree_a, field.name), getattr(tree_b, field.name)
+            diffs.extend(diff_pytree(attr_a, attr_b, prefix + f"{field.name}."))
+        return diffs
+    # Handle dict-like objects
+    elif isinstance(tree_a, Mapping) and isinstance(tree_b, Mapping):
+        if type(tree_a) is not type(tree_b):
+            diffs.append(f"{prefix}: type {type(tree_a)} vs {type(tree_b)}")
+            return diffs
+        keys_a, keys_b = set(tree_a.keys()), set(tree_b.keys())
+        for k in keys_a - keys_b:
+            diffs.append(f"{prefix}{k}: present in A only")
+        for k in keys_b - keys_a:
+            diffs.append(f"{prefix}{k}: present in B only")
+        for k in keys_a & keys_b:
+            diffs.extend(diff_pytree(tree_a[k], tree_b[k], prefix + f"{k}."))
+        return diffs
+    # Handle tuple/list
+    elif isinstance(tree_a, Sequence) and isinstance(tree_b, Sequence):
+        if type(tree_a) is not type(tree_b):
+            diffs.append(f"{prefix}: type {type(tree_a)} vs {type(tree_b)}")
+            return diffs
+        if len(tree_a) != len(tree_b):
+            diffs.append(f"{prefix}: different lengths {len(tree_a)} vs {len(tree_b)}")
+        for i, (a_i, b_i) in enumerate(zip(tree_a, tree_b, strict=True)):
+            diffs.extend(diff_pytree(a_i, b_i, prefix + f"[{i}]."))
+        return diffs
+    # Handles basic types.
+    elif isinstance(tree_a, (int, float, bool, str, type(None), np.number, np.bool, bytes)):
+        if tree_a != tree_b:
+            diffs.append(f"{prefix}: {tree_a!r} vs {tree_b!r}")
+        return diffs
+    # Handles Numpy arrays.
+    elif isinstance(tree_a, np.ndarray) and isinstance(tree_b, np.ndarray):
+        if tree_a.shape != tree_b.shape:
+            diffs.append(f"{prefix}: shape {tree_a.shape} vs {tree_b.shape}")
+        if tree_a.dtype != tree_b.dtype:
+            diffs.append(f"{prefix}: dtype {tree_a.dtype} vs {tree_b.dtype}")
+        return diffs
+    # Handle arrays (check shape/dtype)
+    elif isinstance(tree_a, jnp.ndarray) and isinstance(tree_b, jnp.ndarray):
+        if tree_a.shape != tree_b.shape:
+            diffs.append(f"{prefix}: shape {tree_a.shape} vs {tree_b.shape}")
+        if tree_a.dtype != tree_b.dtype:
+            diffs.append(f"{prefix}: dtype {tree_a.dtype} vs {tree_b.dtype}")
+        aval_a = get_aval(tree_a)
+        aval_b = get_aval(tree_b)
+        if aval_a != aval_b:  # pyright: ignore[reportAttributeAccessIssue]
+            diffs.append(f"{prefix}: aval {aval_a} vs {aval_b}")
+        return diffs
+    # Handle mismatched types
+    elif type(tree_a) is not type(tree_b):
+        diffs.append(f"{prefix}: type {type(tree_a)} vs {type(tree_b)}")
+        return diffs
+    else:
+        raise ValueError(f"Unknown type: {type(tree_a)}")

{xax-0.3.10.dist-info → xax-0.3.12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: xax
-Version: 0.3.10
+Version: 0.3.12
 Summary: A library for fast Jax experimentation
 Home-page: https://github.com/kscalelabs/xax
 Author: Benjamin Bolte

{xax-0.3.10.dist-info → xax-0.3.12.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-xax/__init__.py,sha256=lSwyrPTof_BZ-pyPNhNICJnCZMN9i2sJ-Ii3S_vY_28,16666
+xax/__init__.py,sha256=HXD6tR7Bz1b5ImFyRyR1kAok-dx5g8eBDpO_lCIP8rk,16782
 xax/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 xax/requirements-dev.txt,sha256=qkscNkFzWd1S5fump-AKH53rR65v2x5FmboFdy_kKvs,128
 xax/requirements.txt,sha256=6qY-84e-sTmlfJNrSjwONQKqzAn5h8G_oGIhnhmfSr4,302
@@ -9,7 +9,7 @@ xax/core/conf.py,sha256=d7Dp_GwKnaxtkztlSrJSM_LR0UYJX_FWTtceIWCBkxc,5138
 xax/core/state.py,sha256=_gtINsRc310Bu_HuIYsDoOKTZa6DgU2tz0IOKkdnY9Q,3813
 xax/nn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 xax/nn/attention.py,sha256=m6yEoRqf7-wLgrEltaR6CxF_Cody0MaNtAkuKk39qJI,31176
-xax/nn/distributions.py,sha256=096IDvoJ0ZA4SqcfgNSmrICsGcsKVcTAh0Vl6SwN3-o,6343
+xax/nn/distributions.py,sha256=6YOjyiPOC7XLDaMYpFNBlLCu3eLgDAeqIg9FoKfYLL4,6497
 xax/nn/embeddings.py,sha256=8tAuAPdkVj-U5IwtRZKHA0WYMFRbpCuwyAxcChdKhbE,11784
 xax/nn/functions.py,sha256=bA5kJYzMtFM8eUqBC086i355zJMAO7k_vPFNSDBI9-s,2814
 xax/nn/geom.py,sha256=c9K52vLm-V-15CRqMNx0OmqsWfb3PHQxXW4OSx9kCAk,10635
@@ -43,7 +43,7 @@ xax/task/mixins/logger.py,sha256=6oXsJJyNUx6YT3q58FVXMZBUpMgjVkGre6BXFN20cVI,280
 xax/task/mixins/process.py,sha256=hqDEsMp_SL6ee97iq26-G0g49OcWZZaX82JD4F22eJU,1781
 xax/task/mixins/runnable.py,sha256=pcLrYc_TycZUY9zZim05Skc2FWk3IZKFnu6p3UDMonM,1966
 xax/task/mixins/step_wrapper.py,sha256=-Yu5Nft2CRw1JvZt6J_94SM1vqX8fk08IDK95Pmd2ew,1648
-xax/task/mixins/train.py,sha256=_kDpifLi1arSuT0ssFhBV0axpvLlQG3a97pohya0Eqc,32908
+xax/task/mixins/train.py,sha256=hwAR_G1kgvhXgrE5ZRNL4Jn-Teflx65_1bdk6aULXEg,32814
 xax/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 xax/utils/debugging.py,sha256=OtUdu-3tQsQtik0Q9UM-SNV46IbPjwrAfZcywzoB5d4,1940
 xax/utils/experiments.py,sha256=5k5hPYSaVjzoR_nm2Q3DAHMMYi3Bcp3N3PAQbwZq7Gg,29830
@@ -52,7 +52,7 @@ xax/utils/jaxpr.py,sha256=H7pWl48ROXIB1-ZPWYfOn-ou3EBMxYWIwc_A0reJQoo,2333
 xax/utils/logging.py,sha256=Kkyma_LJXqrN2HTQ214gRP_9ih3_bKk115MWC60lQWM,6656
 xax/utils/numpy.py,sha256=_jOXVi-d2AtJnRftPkRK5MDMzsU8slgw-Jjv4GRm6ns,1197
 xax/utils/profile.py,sha256=-aFdWpgYFvBsBZXSLL4zXrFe3zzsDqzmx4q5f2WOtpQ,1628
-xax/utils/pytree.py,sha256=w8Ab2LmJdQ8e1FxKF0xWWaOak09Mhu44ZcOeUR6uGFA,9889
+xax/utils/pytree.py,sha256=e8T5DY0ZhPcbvS3EuOsac0Oprra46lN05WEIhVN-3V0,12670
 xax/utils/tensorboard.py,sha256=P0oIFvX2Qts1H4lkpizhRIpQdD0MNppVMeut0Z94yCs,19878
 xax/utils/text.py,sha256=xS02aSzdywl3KIaNSpKWcxdd37oYlUJtu9wIjkc1wVc,10654
 xax/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -60,9 +60,9 @@ xax/utils/data/collate.py,sha256=Rd9vMomr_S_zCa_Hi4dO-8ntzAfVwndIUtuXFA3iNcc,706
 xax/utils/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 xax/utils/types/frozen_dict.py,sha256=ebtHENhyUzSjyJTlbMaLtcckQIJ7EtgJiok_40TJZpo,4689
 xax/utils/types/hashable_array.py,sha256=l5iIcFmkYzfGeaZmcSoeFkthFASqM8xJYK3AXhZQYwc,992
-xax-0.3.10.dist-info/licenses/LICENSE,sha256=HCN2bImAzUOXldAZZI7JZ9PYq6OwMlDAP_PpX1HnuN0,1071
-xax-0.3.10.dist-info/METADATA,sha256=oQMGYjsfYxMmw0A60qE15yda_G-0YG5RNl17tboR1f0,1247
-xax-0.3.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-xax-0.3.10.dist-info/entry_points.txt,sha256=uRC6rx5ce0bf-FblJaZSBMxxKFfMyoWTf8OWbBmLSe8,61
-xax-0.3.10.dist-info/top_level.txt,sha256=g4Au_r2XhvZ-lTybviH-Fh9g0zF4DAYHYxPue1-xbs8,4
-xax-0.3.10.dist-info/RECORD,,
+xax-0.3.12.dist-info/licenses/LICENSE,sha256=HCN2bImAzUOXldAZZI7JZ9PYq6OwMlDAP_PpX1HnuN0,1071
+xax-0.3.12.dist-info/METADATA,sha256=RACxHJ_iF4r0BTTTgyTI1ExYF_-aXRWrsq3NlQC7l9A,1247
+xax-0.3.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+xax-0.3.12.dist-info/entry_points.txt,sha256=uRC6rx5ce0bf-FblJaZSBMxxKFfMyoWTf8OWbBmLSe8,61
+xax-0.3.12.dist-info/top_level.txt,sha256=g4Au_r2XhvZ-lTybviH-Fh9g0zF4DAYHYxPue1-xbs8,4
+xax-0.3.12.dist-info/RECORD,,

{xax-0.3.10.dist-info → xax-0.3.12.dist-info}/WHEEL RENAMED Viewed

File without changes

{xax-0.3.10.dist-info → xax-0.3.12.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{xax-0.3.10.dist-info → xax-0.3.12.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{xax-0.3.10.dist-info → xax-0.3.12.dist-info}/top_level.txt RENAMED Viewed

File without changes

xax 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl

xax 0.3.10py3-none-any.whl → 0.3.12py3-none-any.whl