PyPI - xax - Versions diffs - 0.3.10__py3-none-any.whl → 0.3.11__py3-none-any.whl - Mend

xax 0.3.10py3-none-any.whl → 0.3.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

xax/__init__.py CHANGED Viewed

@@ -12,7 +12,7 @@ and running the update script:
     python -m scripts.update_api --inplace
 """
-__version__ = "0.3.10"
+__version__ = "0.3.11"
 # This list shouldn't be modified by hand; instead, run the update script.
 __all__ = [

xax/nn/distributions.py CHANGED Viewed

@@ -12,12 +12,16 @@ __all__ = [
     "MixtureOfGaussians",
 ]
+import math
 from abc import ABC, abstractmethod
 import jax
 import jax.numpy as jnp
 from jaxtyping import Array, PRNGKeyArray
+STD_CLIP = 1e-6
+LOGIT_CLIP = math.log(1e4)
 class Distribution(ABC):
     @abstractmethod
@@ -34,87 +38,91 @@ class Distribution(ABC):
 class Categorical(Distribution):
-    def __init__(self, logits_n: Array) -> None:
-        self.logits_n = logits_n
+    def __init__(self, logits_nc: Array, logit_clip: float = LOGIT_CLIP) -> None:
+        """Initialize a categorical distribution.
+        Args:
+            logits_nc: Array of shape (..., n_categories) containing logits
+            logit_clip: Clipping value for logits
+        """
+        self.logits_nc = jnp.clip(logits_nc, -logit_clip, logit_clip)
     @property
     def num_categories(self) -> int:
-        return self.logits_n.shape[-1]
+        return self.logits_nc.shape[-1]
-    def log_prob(self, x: Array) -> Array:
-        """Compute log probability for specific categories.
-        Args:
-            x: Array of category indices
-        Returns:
-            Log probabilities for the given categories
-        """
-        log_probs = jax.nn.log_softmax(self.logits_n, axis=-1)
-        # Use advanced indexing to get the log probabilities for the given categories
-        return log_probs[x]
+    def log_prob(self, x_n: Array) -> Array:
+        log_probs_n = jax.nn.log_softmax(self.logits_nc, axis=-1)
+        return log_probs_n[x_n]
     def sample(self, key: PRNGKeyArray) -> Array:
-        return jax.random.categorical(key, self.logits_n, axis=-1)
+        return jax.random.categorical(key, self.logits_nc, axis=-1)
     def mode(self) -> Array:
-        return self.logits_n.argmax(axis=-1)
+        return self.logits_nc.argmax(axis=-1)
     def entropy(self) -> Array:
-        """Compute entropy of the categorical distribution."""
-        probs = jax.nn.softmax(self.logits_n, axis=-1)
-        log_probs = jax.nn.log_softmax(self.logits_n, axis=-1)
+        probs = jax.nn.softmax(self.logits_nc, axis=-1)
+        log_probs = jax.nn.log_softmax(self.logits_nc, axis=-1)
         return -jnp.sum(probs * log_probs, axis=-1)
 class Normal(Distribution):
-    def __init__(self, loc: Array, scale: Array) -> None:
-        self.loc = loc
-        self.scale = scale
+    def __init__(self, loc_n: Array, scale_n: Array, std_clip: float = STD_CLIP) -> None:
+        """Initialize a normal distribution.
+        Args:
+            loc_n: Mean of the distribution
+            scale_n: Standard deviation of the distribution
+            std_clip: Minimum standard deviation
+        """
+        self.loc_n = loc_n
+        self.scale_n = jnp.clip(scale_n, min=std_clip)
     def log_prob(self, x: Array) -> Array:
-        return -0.5 * jnp.log(2 * jnp.pi) - jnp.log(self.scale) - (x - self.loc) ** 2 / (2 * self.scale**2)
+        return -0.5 * jnp.log(2 * jnp.pi) - jnp.log(self.scale_n) - (x - self.loc_n) ** 2 / (2 * self.scale_n**2)
     def sample(self, key: PRNGKeyArray) -> Array:
-        return self.loc + self.scale * jax.random.normal(key, self.loc.shape)
+        return self.loc_n + self.scale_n * jax.random.normal(key, self.loc_n.shape)
     def mode(self) -> Array:
-        return self.loc
+        return self.loc_n
     def entropy(self) -> Array:
-        return jnp.log(2 * jnp.pi * jnp.e) + jnp.log(self.scale)
+        return jnp.log(2 * jnp.pi * jnp.e) + jnp.log(self.scale_n)
 class MixtureOfGaussians(Distribution):
-    def __init__(self, means_nm: Array, stds_nm: Array, logits_nm: Array) -> None:
+    def __init__(
+        self,
+        means_nm: Array,
+        stds_nm: Array,
+        logits_nm: Array,
+        std_clip: float = STD_CLIP,
+        logit_clip: float = LOGIT_CLIP,
+    ) -> None:
         """Initialize a mixture of Gaussians.
         Args:
             means_nm: Array of shape (..., n_components) containing means
             stds_nm: Array of shape (..., n_components) containing standard deviations
             logits_nm: Array of shape (..., n_components) containing mixing logits
+            std_clip: Minimum standard deviation
+            logit_clip: Clipping value for logits
         """
         self.means_nm = means_nm
-        self.stds_nm = stds_nm
-        self.logits_nm = logits_nm
-    def log_prob(self, x: Array) -> Array:
-        """Compute log probability of the mixture.
+        self.stds_nm = jnp.clip(stds_nm, min=std_clip)
+        self.logits_nm = jnp.clip(logits_nm, -logit_clip, logit_clip)
-        Args:
-            x: Array of shape (...,) containing values to evaluate
-        Returns:
-            Log probabilities of shape (...,)
-        """
+    def log_prob(self, x_n: Array) -> Array:
         # Expand x to match component dimensions
-        x_expanded = x[..., None]  # Shape: (..., 1)
+        x_n_expanded = x_n[..., None]  # Shape: (..., 1)
         # Compute log probabilities for each component
         component_log_probs = (
             -0.5 * jnp.log(2 * jnp.pi)
             - jnp.log(self.stds_nm)
-            - (x_expanded - self.means_nm) ** 2 / (2 * self.stds_nm**2)
+            - (x_n_expanded - self.means_nm) ** 2 / (2 * self.stds_nm**2)
         )
         # Compute mixing weights
@@ -123,16 +131,7 @@ class MixtureOfGaussians(Distribution):
         # Combine using log-sum-exp trick for numerical stability
         return jax.scipy.special.logsumexp(component_log_probs + mixing_logits, axis=-1)
-    def sample(self, key: PRNGKeyArray) -> Array:
-        """Sample from the mixture of Gaussians.
-        Args:
-            key: PRNG key
-        Returns:
-            Samples of shape (...,) where ... are the batch dimensions
-        """
-        # Sample component indices
+    def sample(self, key: PRNGKeyArray) -> Array:  # Sample component indices
         component_key, sample_key = jax.random.split(key)
         component_indices = jax.random.categorical(component_key, self.logits_nm, axis=-1)
@@ -153,8 +152,8 @@ class MixtureOfGaussians(Distribution):
         noise = jax.random.normal(sample_key, selected_means.shape)
         # Reshape back to original batch shape
-        samples = selected_means + selected_stds * noise
-        return samples.reshape(batch_shape)
+        samples_n = selected_means + selected_stds * noise
+        return samples_n.reshape(batch_shape)
     def mode(self) -> Array:
         """Return the mode of the mixture (approximate - returns mean of highest weight component)."""

xax/task/mixins/train.py CHANGED Viewed

@@ -177,7 +177,6 @@ class TrainConfig(
     step_kind: str = field("step", help=f"How to measure a step; one of [{', '.join(get_args(StepKind))}]")
     updates_per_step: int = field(1, help="Number of updates to perform per step")
     random_seed: int = field(1337, help="Random seed for the task")
-    global_grad_clip: float = field(value=10.0, help="The maximum gradient norm to clip to.")
 Config = TypeVar("Config", bound=TrainConfig)

{xax-0.3.10.dist-info → xax-0.3.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: xax
-Version: 0.3.10
+Version: 0.3.11
 Summary: A library for fast Jax experimentation
 Home-page: https://github.com/kscalelabs/xax
 Author: Benjamin Bolte

{xax-0.3.10.dist-info → xax-0.3.11.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-xax/__init__.py,sha256=lSwyrPTof_BZ-pyPNhNICJnCZMN9i2sJ-Ii3S_vY_28,16666
+xax/__init__.py,sha256=Kd9-a62JICqpaZqb0WaJMz7qC5uHYghOHZsnCb3EC6Q,16666
 xax/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 xax/requirements-dev.txt,sha256=qkscNkFzWd1S5fump-AKH53rR65v2x5FmboFdy_kKvs,128
 xax/requirements.txt,sha256=6qY-84e-sTmlfJNrSjwONQKqzAn5h8G_oGIhnhmfSr4,302
@@ -9,7 +9,7 @@ xax/core/conf.py,sha256=d7Dp_GwKnaxtkztlSrJSM_LR0UYJX_FWTtceIWCBkxc,5138
 xax/core/state.py,sha256=_gtINsRc310Bu_HuIYsDoOKTZa6DgU2tz0IOKkdnY9Q,3813
 xax/nn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 xax/nn/attention.py,sha256=m6yEoRqf7-wLgrEltaR6CxF_Cody0MaNtAkuKk39qJI,31176
-xax/nn/distributions.py,sha256=096IDvoJ0ZA4SqcfgNSmrICsGcsKVcTAh0Vl6SwN3-o,6343
+xax/nn/distributions.py,sha256=b251blOwdxkWUOaYjOuqcR_HNMfm9I8Aq9EDxoIxHVw,6519
 xax/nn/embeddings.py,sha256=8tAuAPdkVj-U5IwtRZKHA0WYMFRbpCuwyAxcChdKhbE,11784
 xax/nn/functions.py,sha256=bA5kJYzMtFM8eUqBC086i355zJMAO7k_vPFNSDBI9-s,2814
 xax/nn/geom.py,sha256=c9K52vLm-V-15CRqMNx0OmqsWfb3PHQxXW4OSx9kCAk,10635
@@ -43,7 +43,7 @@ xax/task/mixins/logger.py,sha256=6oXsJJyNUx6YT3q58FVXMZBUpMgjVkGre6BXFN20cVI,280
 xax/task/mixins/process.py,sha256=hqDEsMp_SL6ee97iq26-G0g49OcWZZaX82JD4F22eJU,1781
 xax/task/mixins/runnable.py,sha256=pcLrYc_TycZUY9zZim05Skc2FWk3IZKFnu6p3UDMonM,1966
 xax/task/mixins/step_wrapper.py,sha256=-Yu5Nft2CRw1JvZt6J_94SM1vqX8fk08IDK95Pmd2ew,1648
-xax/task/mixins/train.py,sha256=_kDpifLi1arSuT0ssFhBV0axpvLlQG3a97pohya0Eqc,32908
+xax/task/mixins/train.py,sha256=hwAR_G1kgvhXgrE5ZRNL4Jn-Teflx65_1bdk6aULXEg,32814
 xax/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 xax/utils/debugging.py,sha256=OtUdu-3tQsQtik0Q9UM-SNV46IbPjwrAfZcywzoB5d4,1940
 xax/utils/experiments.py,sha256=5k5hPYSaVjzoR_nm2Q3DAHMMYi3Bcp3N3PAQbwZq7Gg,29830
@@ -60,9 +60,9 @@ xax/utils/data/collate.py,sha256=Rd9vMomr_S_zCa_Hi4dO-8ntzAfVwndIUtuXFA3iNcc,706
 xax/utils/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 xax/utils/types/frozen_dict.py,sha256=ebtHENhyUzSjyJTlbMaLtcckQIJ7EtgJiok_40TJZpo,4689
 xax/utils/types/hashable_array.py,sha256=l5iIcFmkYzfGeaZmcSoeFkthFASqM8xJYK3AXhZQYwc,992
-xax-0.3.10.dist-info/licenses/LICENSE,sha256=HCN2bImAzUOXldAZZI7JZ9PYq6OwMlDAP_PpX1HnuN0,1071
-xax-0.3.10.dist-info/METADATA,sha256=oQMGYjsfYxMmw0A60qE15yda_G-0YG5RNl17tboR1f0,1247
-xax-0.3.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-xax-0.3.10.dist-info/entry_points.txt,sha256=uRC6rx5ce0bf-FblJaZSBMxxKFfMyoWTf8OWbBmLSe8,61
-xax-0.3.10.dist-info/top_level.txt,sha256=g4Au_r2XhvZ-lTybviH-Fh9g0zF4DAYHYxPue1-xbs8,4
-xax-0.3.10.dist-info/RECORD,,
+xax-0.3.11.dist-info/licenses/LICENSE,sha256=HCN2bImAzUOXldAZZI7JZ9PYq6OwMlDAP_PpX1HnuN0,1071
+xax-0.3.11.dist-info/METADATA,sha256=FaS2TIfJ5ExcZYXP1KBugCPal5jexn_HZ5oFQCDvq9g,1247
+xax-0.3.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+xax-0.3.11.dist-info/entry_points.txt,sha256=uRC6rx5ce0bf-FblJaZSBMxxKFfMyoWTf8OWbBmLSe8,61
+xax-0.3.11.dist-info/top_level.txt,sha256=g4Au_r2XhvZ-lTybviH-Fh9g0zF4DAYHYxPue1-xbs8,4
+xax-0.3.11.dist-info/RECORD,,

{xax-0.3.10.dist-info → xax-0.3.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{xax-0.3.10.dist-info → xax-0.3.11.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{xax-0.3.10.dist-info → xax-0.3.11.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{xax-0.3.10.dist-info → xax-0.3.11.dist-info}/top_level.txt RENAMED Viewed

File without changes

xax 0.3.10__py3-none-any.whl → 0.3.11__py3-none-any.whl

xax 0.3.10py3-none-any.whl → 0.3.11py3-none-any.whl