PyPI - xax - Versions diffs - 0.0.3__py3-none-any.whl → 0.0.6__py3-none-any.whl - Mend

xax 0.0.3py3-none-any.whl → 0.0.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

xax/__init__.py +122 -8
xax/core/conf.py +9 -33
xax/core/state.py +13 -23
xax/nn/embeddings.py +355 -0
xax/nn/functions.py +8 -4
xax/requirements-dev.txt +9 -1
xax/requirements.txt +17 -10
xax/task/base.py +2 -6
xax/task/logger.py +419 -412
xax/task/loggers/callback.py +44 -0
xax/task/loggers/state.py +5 -18
xax/task/loggers/tensorboard.py +16 -33
xax/task/mixins/__init__.py +3 -1
xax/task/mixins/artifacts.py +19 -9
xax/task/mixins/checkpointing.py +221 -0
xax/task/mixins/compile.py +104 -0
xax/task/mixins/cpu_stats.py +26 -15
xax/task/mixins/data_loader.py +27 -19
xax/task/mixins/gpu_stats.py +22 -8
xax/task/mixins/logger.py +5 -251
xax/task/mixins/process.py +8 -1
xax/task/mixins/runnable.py +3 -0
xax/task/mixins/step_wrapper.py +5 -0
xax/task/mixins/train.py +236 -145
xax/task/script.py +1 -1
xax/task/task.py +13 -5
xax/utils/data/collate.py +6 -6
xax/utils/experiments.py +45 -1
xax/utils/logging.py +29 -0
xax/utils/tensorboard.py +89 -21
xax-0.0.6.dist-info/METADATA +50 -0
xax-0.0.6.dist-info/RECORD +52 -0
{xax-0.0.3.dist-info → xax-0.0.6.dist-info}/WHEEL +1 -1
xax/task/launchers/staged.py +0 -29
xax-0.0.3.dist-info/METADATA +0 -39
xax-0.0.3.dist-info/RECORD +0 -49
{xax-0.0.3.dist-info → xax-0.0.6.dist-info}/LICENSE +0 -0
{xax-0.0.3.dist-info → xax-0.0.6.dist-info}/top_level.txt +0 -0

xax/task/logger.py CHANGED Viewed

@@ -11,16 +11,21 @@ captions to images, and so on.
 import functools
 import logging
+import math
+import re
 import time
 from abc import ABC, abstractmethod
 from collections import defaultdict
 from dataclasses import dataclass
 from types import TracebackType
-from typing import Callable, Literal, Self, Sequence, TypeVar, get_args
+from typing import Callable, Iterator, Literal, Self, Sequence, TypeVar, get_args
+import jax
+import jax.numpy as jnp
 import numpy as np
 from jaxtyping import Array
-from omegaconf import DictConfig
+from PIL import Image, ImageDraw, ImageFont
+from PIL.Image import Image as PILImage
 from xax.core.state import Phase, State
 from xax.utils.experiments import IntervalTicker
@@ -34,15 +39,58 @@ Number = int | float | Array | np.ndarray
 ChannelSelectMode = Literal["first", "last", "mean"]
-VALID_VIDEO_CHANNEL_COUNTS = {1, 3}
-VALID_AUDIO_CHANNEL_COUNTS = {1, 2}
-TARGET_FPS = 12
 DEFAULT_NAMESPACE = "value"
 NAMESPACE_STACK: list[str] = []
+def standardize_text(text: str, max_line_length: int | None = None, remove_non_ascii: bool = False) -> list[str]:
+    """Standardizes a text string to a list of lines.
+    Args:
+        text: The text to standardize
+        max_line_length: If set, truncate lines to this length
+        remove_non_ascii: Remove non-ASCII characters if present
+    Returns:
+        The standardized text lines
+    """
+    def _chunk_lines(text: str, max_length: int) -> Iterator[str]:
+        for i in range(0, len(text), max_length):
+            yield text[i : i + max_length]
+    if remove_non_ascii:
+        text = "".join(char for char in text if ord(char) < 128)
+    lines = [re.sub(r"\s+", " ", line) for line in re.split(r"[\n\r]+", text.strip())]
+    if max_line_length is not None:
+        lines = [subline for line in lines for subline in _chunk_lines(line, max_line_length)]
+    return lines
+def make_human_viewable_resolution(
+    image: PILImage,
+    interpolation: Image.Resampling = Image.Resampling.LANCZOS,
+    trg_res: tuple[int, int] = (512, 512),
+) -> PILImage:
+    """Resizes image to human-viewable resolution.
+    Args:
+        image: The image to resize, with shape (C, H, W)
+        interpolation: Interpolation mode to use for image resizing
+        trg_res: The target image resolution; the image will be reshaped to
+            have approximately the same area as an image with this resolution
+    Returns:
+        The resized image
+    """
+    width, height = image.size
+    trg_height, trg_width = trg_res
+    factor = math.sqrt((trg_height * trg_width) / (height * width))
+    new_height, new_width = int(height * factor), int(width * factor)
+    return image.resize((new_width, new_height), interpolation)
 class namespace_context:  # noqa: N801
     def __init__(self, name: str | None) -> None:
         self._name = name
@@ -62,45 +110,134 @@ class namespace_context:  # noqa: N801
             NAMESPACE_STACK.pop()
-@dataclass
-class LogImage:
-    pixels: Array
+def normalize(x: np.ndarray) -> np.ndarray:
+    return (x - x.min()) / (x.max() - x.min())
+def ternary_search_optimal_side_counts(height: int, width: int, count: int) -> tuple[int, int]:
+    min_factors = [i for i in range(1, math.ceil(math.sqrt(count)) + 1) if count % i == 0]
+    max_factors = [i for i in min_factors[::-1] if i * i != count]
+    factors = [(i, count // i) for i in min_factors] + [(count // i, i) for i in max_factors]
+    lo, hi = 0, len(factors) - 1
+    def penalty(i: int) -> float:
+        hval, wval = factors[i]
+        h, w = hval * height, wval * width
+        return -(min(h, w) ** 2)
+    # Runs ternary search to minimize penalty.
+    while lo < hi - 2:
+        lmid, rmid = (lo * 2 + hi) // 3, (lo + hi * 2) // 3
+        if penalty(lmid) > penalty(rmid):
+            lo = lmid
+        else:
+            hi = rmid
+    # Returns the lowest-penalty configuration.
+    mid = (lo + hi) // 2
+    plo, pmid, phi = penalty(lo), penalty(mid), penalty(hi)
+    if pmid <= plo and pmid <= phi:
+        return factors[mid]
+    elif plo <= phi:
+        return factors[lo]
+    else:
+        return factors[hi]
+def tile_images_different_sizes(images: list[PILImage], sep: int) -> PILImage:
+    """Tiles a list of images into a single image, even if they have different sizes.
+    Args:
+        images: The images to tile.
+        sep: The separation between adjacent images.
+    Returns:
+        The tiled image.
+    """
+    total_width, max_height = sum(image.width for image in images), max(image.height for image in images)
+    tiled = Image.new("RGB", (total_width + (len(images) - 1) * sep, max_height))
+    x = 0
+    for image in images:
+        tiled.paste(image, (x, 0))
+        x += image.width + sep
+    return tiled
-@dataclass
-class LogAudio:
-    frames: Array
-    sample_rate: int
+def tile_images(images: list[PILImage], sep: int = 0) -> PILImage:
+    """Tiles a list of images into a single image.
+    Args:
+        images: The images to tile.
+        sep: The separation between adjacent images.
-@dataclass
+    Returns:
+        The tiled image.
+    """
+    if not images:
+        return Image.new("RGB", (0, 0))
+    # Gets the optimal side counts.
+    height, width = images[0].height, images[0].width
+    if not all(image.size == images[0].size for image in images):
+        return tile_images_different_sizes(images, sep)
+    hside, wside = ternary_search_optimal_side_counts(height, width, len(images))
+    # Tiles the images.
+    tiled = Image.new("RGB", (wside * width + (wside - 1) * sep, hside * height + (hside - 1) * sep))
+    for i, image in enumerate(images):
+        x, y = i % wside, i // wside
+        tiled.paste(image, (x * (width + sep), y * (height + sep)))
+    return tiled
+def as_numpy(array: Array) -> np.ndarray:
+    array = jax.device_get(array)
+    if jax.dtypes.issubdtype(array.dtype, jnp.floating):
+        array = array.astype(jnp.float32)
+    elif jax.dtypes.issubdtype(array.dtype, jnp.integer):
+        array = array.astype(jnp.int32)
+    elif jax.dtypes.issubdtype(array.dtype, jnp.bool_):
+        array = array.astype(jnp.bool_)
+    return np.array(array)
+@dataclass(kw_only=True)
+class LogImage:
+    image: PILImage
+@dataclass(kw_only=True)
 class LogVideo:
-    frames: Array
+    """Container for video data and metadata.
+    Attributes:
+        frames: Video frames as a numpy array of shape (T,H,W,C)
+        fps: Frames per second
+    """
-@dataclass
-class LogPointCloud:
-    xyz: Array
-    colors: Array | None
+    frames: np.ndarray
+    fps: int
-@dataclass
+@dataclass(kw_only=True)
 class LogLine:
     state: State
     scalars: dict[str, dict[str, Number]]
     strings: dict[str, dict[str, str]]
     images: dict[str, dict[str, LogImage]]
-    audios: dict[str, dict[str, LogAudio]]
     videos: dict[str, dict[str, LogVideo]]
-    point_cloud: dict[str, dict[str, LogPointCloud]]
-@dataclass
+@dataclass(kw_only=True)
 class LogErrorSummary:
     message: str
-@dataclass
+@dataclass(kw_only=True)
 class LogError:
     message: str
     location: str | None = None
@@ -113,7 +250,7 @@ class LogError:
         return message
-@dataclass
+@dataclass(kw_only=True)
 class LogStatus:
     message: str
     created: float
@@ -121,7 +258,7 @@ class LogStatus:
     lineno: int | None = None
-@dataclass
+@dataclass(kw_only=True)
 class LogPing:
     message: str
     created: float
@@ -129,6 +266,120 @@ class LogPing:
     lineno: int | None = None
+def get_image(image: np.ndarray | Array | PILImage, target_resolution: tuple[int, int] | None = None) -> LogImage:
+    if not isinstance(image, (np.ndarray, Array, PILImage)):
+        raise ValueError(f"Unsupported image type: {type(image)}")
+    if isinstance(image, Array):
+        image = as_numpy(image)
+    if isinstance(image, np.ndarray):
+        if image.ndim == 2:
+            image = np.expand_dims(image, axis=-1)
+        if image.ndim != 3:
+            raise RuntimeError(f"Expected image to have shape HW, HWC, or CHW, got {image.shape}")
+        # Normalizes the image and converts to integer.
+        if np.issubdtype(image.dtype, np.floating):
+            image = (normalize(image) * 255).round().astype(np.uint8)
+        elif image.dtype == np.uint8:
+            pass
+        else:
+            raise ValueError(f"Unsupported image dtype: {image.dtype}")
+        # Converts to a PIL image.
+        if image.shape[-1] == 1:
+            image = Image.fromarray(image[..., 0])
+        elif image.shape[-1] == 3:
+            image = Image.fromarray(image)
+        elif image.shape[0] == 1:
+            image = Image.fromarray(image[0])
+        elif image.shape[0] == 3:
+            image = Image.fromarray(image.transpose(1, 2, 0))
+        else:
+            raise ValueError(f"Unsupported image shape: {image.shape}")
+    if target_resolution is not None:
+        image = make_human_viewable_resolution(image, trg_res=target_resolution)
+    return LogImage(image=image)
+def image_with_text(
+    image: PILImage,
+    text: list[str],
+    max_num_lines: int | None,
+    line_spacing: int,
+    centered: bool,
+) -> LogImage:
+    """Adds a text label to an image.
+    Args:
+        image: The image to label, with shape (C, H, W)
+        text: The text label for the image
+        max_num_lines: The number of lines of spacing to add to the bottom
+            of the image
+        line_spacing: The spacing between adjacent lines
+        centered: If set, center the text labels, otherwise align to the left
+    Returns:
+        The image with a text label
+    """
+    if not text:
+        return LogImage(image=image)
+    if max_num_lines is None:
+        max_num_lines = len(text)
+    else:
+        text = text[:max_num_lines]
+    width, height = image.size
+    font: ImageFont.ImageFont = ImageFont.load_default()
+    _, _, _, line_height = font.getbbox(text[0])
+    new_width, new_height = width, height + line_spacing + max_num_lines * (line_height + line_spacing)
+    padded_image = Image.new(image.mode, (new_width, new_height), 255)
+    padded_image.paste(image, (0, 0))
+    drawer = ImageDraw.Draw(padded_image)
+    for i, text_line in enumerate(text):
+        text_line_top = height + line_spacing + i * (line_height + line_spacing)
+        if centered:
+            _, _, line_width, _ = font.getbbox(text_line)
+            text_line_left = (width - line_width) / 2
+            drawer.text((text_line_left, text_line_top), text_line, font=font, fill=0)
+        else:
+            drawer.text((line_spacing, text_line_top), text_line, font=font, fill=0)
+    return LogImage(image=padded_image)
+def get_video(video: np.ndarray | Array, fps: int = 30) -> LogVideo:
+    """Converts video data to standard format.
+    Args:
+        video: The video frames. Can be:
+            - A numpy array of shape (T, H, W, C) or (T, C, H, W)
+            - A JAX array of shape (T, H, W, C) or (T, C, H, W)
+        fps: Frames per second
+    Returns:
+        LogVideo containing standardized video frames
+    """
+    if isinstance(video, Array):
+        video = as_numpy(video)
+    if not isinstance(video, np.ndarray):
+        raise ValueError(f"Unsupported video type: {type(video)}")
+    # Handle different dimension orderings
+    if video.ndim != 4:
+        raise ValueError(f"Expected video array of shape (T, H, W, C) or (T, C, H, W), got shape {video.shape}")
+    if video.shape[1] == 3:  # (T,C,H,W) format
+        video = video.transpose(0, 2, 3, 1)
+    # Normalize and convert to uint8 if needed
+    if np.issubdtype(video.dtype, np.floating):
+        video = (normalize(video) * 255).round().astype(np.uint8)
+    elif video.dtype != np.uint8:
+        raise ValueError(f"Unsupported video dtype: {video.dtype}")
+    return LogVideo(frames=video, fps=fps)
 class LoggerImpl(ABC):
     def __init__(self, log_interval_seconds: float = 1.0) -> None:
         """Defines some default behavior for loggers.
@@ -187,25 +438,12 @@ class LoggerImpl(ABC):
             ping: The ping to write.
         """
-    def log_git_state(self, git_state: str) -> None:
-        """Logs Git state for the current run.
-        Args:
-            git_state: The Git state, as text blocks.
-        """
-    def log_training_code(self, training_code: str) -> None:
-        """Logs the training script code.
+    def log_file(self, name: str, contents: str) -> None:
+        """Logs a large text file.
         Args:
-            training_code: The training script code.
-        """
-    def log_config(self, config: DictConfig) -> None:
-        """Logs the configuration for the current run.
-        Args:
-            config: The configuration, as a DictConfig.
+            name: The name of the file.
+            contents: The contents of the file.
         """
     def should_log(self, state: State) -> bool:
@@ -260,11 +498,8 @@ class Logger:
     def __init__(self, default_namespace: str = DEFAULT_NAMESPACE) -> None:
         self.scalars: dict[str, dict[str, Callable[[], Number]]] = defaultdict(dict)
         self.strings: dict[str, dict[str, Callable[[], str]]] = defaultdict(dict)
-        self.images: dict[str, dict[str, Callable[[], Array]]] = defaultdict(dict)
-        self.audio: dict[str, dict[str, Callable[[], tuple[Array, int]]]] = defaultdict(dict)
-        self.videos: dict[str, dict[str, Callable[[], Array]]] = defaultdict(dict)
-        self.histograms: dict[str, dict[str, Callable[[], Array]]] = defaultdict(dict)
-        self.point_clouds: dict[str, dict[str, Callable[[], tuple[Array, Array | None]]]] = defaultdict(dict)
+        self.images: dict[str, dict[str, Callable[[], LogImage]]] = defaultdict(dict)
+        self.videos: dict[str, dict[str, Callable[[], LogVideo]]] = defaultdict(dict)
         self.default_namespace = default_namespace
         self.loggers: list[LoggerImpl] = []
@@ -272,6 +507,9 @@ class Logger:
         root_logger = logging.getLogger()
         ToastHandler(self).add_for_logger(root_logger)
+        # Flag when the logger is active.
+        self.active = False
     def add_logger(self, *logger: LoggerImpl) -> None:
         """Add the logger, so that it gets called when `write` is called.
@@ -285,20 +523,15 @@ class Logger:
             state=state,
             scalars={k: {kk: v() for kk, v in v.items()} for k, v in self.scalars.items()},
             strings={k: {kk: v() for kk, v in v.items()} for k, v in self.strings.items()},
-            images={k: {kk: LogImage(v()) for kk, v in v.items()} for k, v in self.images.items()},
-            audios={k: {kk: LogAudio(*v()) for kk, v in v.items()} for k, v in self.audio.items()},
-            videos={k: {kk: LogVideo(v()) for kk, v in v.items()} for k, v in self.videos.items()},
-            point_cloud={k: {kk: LogPointCloud(*v()) for kk, v in v.items()} for k, v in self.point_clouds.items()},
+            images={k: {kk: v() for kk, v in v.items()} for k, v in self.images.items()},
+            videos={k: {kk: v() for kk, v in v.items()} for k, v in self.videos.items()},
         )
     def clear(self) -> None:
         self.scalars.clear()
         self.strings.clear()
         self.images.clear()
-        self.audio.clear()
         self.videos.clear()
-        self.histograms.clear()
-        self.point_clouds.clear()
     def write(self, state: State) -> None:
         """Writes the current step's logging information.
@@ -317,11 +550,11 @@ class Logger:
     def write_error_summary(self, error_summary: str) -> None:
         for logger in self.loggers:
-            logger.write_error_summary(LogErrorSummary(error_summary))
+            logger.write_error_summary(LogErrorSummary(message=error_summary))
     def write_error(self, message: str, location: str | None = None) -> None:
         for logger in self.loggers:
-            logger.write_error(LogError(message, location))
+            logger.write_error(LogError(message=message, location=location))
     def write_status(
         self,
@@ -330,7 +563,12 @@ class Logger:
         lineno: int | None = None,
         created: float | None = None,
     ) -> None:
-        status = LogStatus(message, time.time() if created is None else created, filename, lineno)
+        status = LogStatus(
+            message=message,
+            created=time.time() if created is None else created,
+            filename=filename,
+            lineno=lineno,
+        )
         for logger in self.loggers:
             logger.write_status(status)
@@ -341,7 +579,12 @@ class Logger:
         lineno: int | None = None,
         created: float | None = None,
     ) -> None:
-        ping = LogPing(message, time.time() if created is None else created, filename, lineno)
+        ping = LogPing(
+            message=message,
+            created=time.time() if created is None else created,
+            filename=filename,
+            lineno=lineno,
+        )
         for logger in self.loggers:
             logger.write_ping(ping)
@@ -356,8 +599,13 @@ class Logger:
             value: The scalar value being logged
             namespace: An optional logging namespace
         """
+        if not self.active:
+            raise RuntimeError("The logger is not active")
         namespace = self.resolve_namespace(namespace)
+        if isinstance(value, jnp.ndarray):
+            assert value.ndim == 0, f"Scalar must be a 0D array, got shape {value.shape}"
         @functools.lru_cache(maxsize=None)
         def scalar_future() -> Number:
             return value() if callable(value) else value
@@ -372,6 +620,8 @@ class Logger:
             value: The string value being logged
             namespace: An optional logging namespace
         """
+        if not self.active:
+            raise RuntimeError("The logger is not active")
         namespace = self.resolve_namespace(namespace)
         @functools.lru_cache(maxsize=None)
@@ -383,71 +633,87 @@ class Logger:
     def log_image(
         self,
         key: str,
-        value: Callable[[], Array] | Array,
+        value: Callable[[], np.ndarray | Array | PILImage] | np.ndarray | Array | PILImage,
         *,
         namespace: str | None = None,
-        keep_resolution: bool = False,
+        target_resolution: tuple[int, int] | None = (512, 512),
     ) -> None:
         """Logs an image.
         Args:
             key: The key being logged
-            value: The image being logged; can be (C, H, W), (H, W, C) or (H, W)
-                as an RGB (3 channel) or grayscale (1 channel) image
+            value: The image being logged
             namespace: An optional logging namespace
-            keep_resolution: If set, keep the image resolution the same,
-                otherwise upscale or downscale the image to a standard
-                resolution
+            target_resolution: The target resolution for each image; if None,
+                don't resample the images
         """
+        if not self.active:
+            raise RuntimeError("The logger is not active")
         namespace = self.resolve_namespace(namespace)
         @functools.lru_cache(maxsize=None)
-        def image_future() -> Array:
-            raise NotImplementedError
+        def image_future() -> LogImage:
+            return get_image(value() if callable(value) else value, target_resolution)
         self.images[namespace][key] = image_future
     def log_labeled_image(
         self,
         key: str,
-        value: Callable[[], tuple[Array, str]] | tuple[Array, str],
+        value: Callable[[], tuple[np.ndarray | Array | PILImage, str]] | tuple[np.ndarray | Array | PILImage, str],
         *,
         namespace: str | None = None,
         max_line_length: int | None = None,
-        keep_resolution: bool = False,
+        max_num_lines: int | None = None,
+        target_resolution: tuple[int, int] | None = (512, 512),
+        line_spacing: int = 2,
         centered: bool = True,
     ) -> None:
         """Logs an image with a label.
         Args:
             key: The key being logged
-            value: The image and label being logged; the image can be (C, H, W),
-                (H, W, C) or (H, W) as an RGB (3 channel) or grayscale
-                (1 channel) image
+            value: The image and label being logged
             namespace: An optional logging namespace
-            max_line_length: Labels longer than this length are wrapped around
-            keep_resolution: If set, keep the image resolution the same,
-                otherwise upscale or downscale the image to a standard
-                resolution
-            centered: If set, center the text labels, otherwise align to the
-                left
+            max_line_length: The maximum line length for the label
+            max_num_lines: The number of lines of spacing to add to the bottom
+                of the image
+            target_resolution: The target resolution for each image; if None,
+                don't resample the images
+            line_spacing: The spacing between adjacent lines
+            centered: If set, center the text labels, otherwise align to the left
         """
+        if not self.active:
+            raise RuntimeError("The logger is not active")
         namespace = self.resolve_namespace(namespace)
         @functools.lru_cache(maxsize=None)
-        def labeled_image_future() -> Array:
-            raise NotImplementedError
+        def image_future() -> LogImage:
+            image, label = value() if callable(value) else value
+            image = get_image(image, target_resolution)
+            return image_with_text(
+                image.image,
+                standardize_text(label, max_line_length),
+                max_num_lines=max_num_lines,
+                line_spacing=line_spacing,
+                centered=centered,
+            )
-        self.images[namespace][key] = labeled_image_future
+        self.images[namespace][key] = image_future
     def log_images(
         self,
         key: str,
-        value: Callable[[], Array] | Array,
+        value: (
+            Callable[[], Sequence[np.ndarray | Array | PILImage] | np.ndarray | Array]
+            | Sequence[np.ndarray | Array | PILImage]
+            | np.ndarray
+            | Array
+        ),
         *,
         namespace: str | None = None,
-        keep_resolution: bool = False,
         max_images: int | None = None,
+        target_resolution: tuple[int, int] | None = (256, 256),
         sep: int = 0,
     ) -> None:
         """Logs a set of images.
@@ -456,35 +722,49 @@ class Logger:
         Args:
             key: The key being logged
-            value: The images being logged; can be (B, C, H, W), (B, H, W, C)
-                or (B H, W) as an RGB (3 channel) or grayscale (1 channel) image
+            value: The images being logged
             namespace: An optional logging namespace
-            keep_resolution: If set, keep the image resolution the same,
-                otherwise upscale or downscale the image to a standard
-                resolution
             max_images: The maximum number of images to show; extra images
                 are clipped
+            target_resolution: The target resolution for each image; if None,
+                don't resample the images
             sep: An optional separation amount between adjacent images
         """
+        if not self.active:
+            raise RuntimeError("The logger is not active")
         namespace = self.resolve_namespace(namespace)
         @functools.lru_cache(maxsize=None)
-        def images_future() -> Array:
-            raise NotImplementedError
+        def images_future() -> LogImage:
+            images = value() if callable(value) else value
+            if max_images is not None:
+                images = images[:max_images]
+            if isinstance(images, Array):
+                images = as_numpy(images)
+            if isinstance(images, Sequence):
+                images = list(images)
+            images = [get_image(image, target_resolution) for image in images]
+            tiled = tile_images([img.image for img in images], sep)
+            return LogImage(image=tiled)
         self.images[namespace][key] = images_future
     def log_labeled_images(
         self,
         key: str,
-        value: Callable[[], tuple[Array, Sequence[str]]] | tuple[Array, Sequence[str]],
+        value: (
+            Callable[[], tuple[Sequence[np.ndarray | Array | PILImage] | np.ndarray | Array, Sequence[str]]]
+            | tuple[Sequence[np.ndarray | Array | PILImage] | np.ndarray | Array, Sequence[str]]
+        ),
         *,
         namespace: str | None = None,
-        max_line_length: int | None = None,
-        keep_resolution: bool = False,
         max_images: int | None = None,
-        sep: int = 0,
+        max_line_length: int | None = None,
+        max_num_lines: int | None = None,
+        target_resolution: tuple[int, int] | None = (256, 256),
+        line_spacing: int = 2,
         centered: bool = True,
+        sep: int = 0,
     ) -> None:
         """Logs a set of images with labels.
@@ -492,353 +772,79 @@ class Logger:
         Args:
             key: The key being logged
-            value: The images and labels being logged; images can be
-                (B, C, H, W), (B, H, W, C) or (B, H, W) as an RGB (3 channel)
-                or grayscale (1 channel) image, with exactly B labels
+            value: The images and labels being logged
             namespace: An optional logging namespace
-            max_line_length: Labels longer than this length are wrapped around
-            keep_resolution: If set, keep the image resolution the same,
-                otherwise upscale or downscale the image to a standard
-                resolution
             max_images: The maximum number of images to show; extra images
                 are clipped
+            max_line_length: The maximum line length for the label
+            max_num_lines: The number of lines of spacing to add to the bottom
+                of the image
+            target_resolution: The target resolution for each image; if None,
+                don't resample the images
+            line_spacing: The spacing between adjacent lines
+            centered: If set, center the text labels, otherwise align to the left
             sep: An optional separation amount between adjacent images
-            centered: If set, center the text labels, otherwise align to the
-                left
-        """
-        namespace = self.resolve_namespace(namespace)
-        @functools.lru_cache(maxsize=None)
-        def labeled_images_future() -> Array:
-            raise NotImplementedError
-        self.images[namespace][key] = labeled_images_future
-    def log_audio(
-        self,
-        key: str,
-        value: Callable[[], Array] | Array,
-        *,
-        namespace: str | None = None,
-        sample_rate: int = 44100,
-        log_spec: bool = False,
-        n_fft_ms: float = 32.0,
-        hop_length_ms: float | None = None,
-        channel_select_mode: ChannelSelectMode = "first",
-        keep_resolution: bool = False,
-    ) -> None:
-        """Logs an audio clip.
-        Args:
-            key: The key being logged
-            value: The audio clip being logged; can be (C, T) or (T) as
-                a mono (1 channel) or stereo (2 channel) audio clip
-            namespace: An optional logging namespace
-            sample_rate: The sample rate of the audio clip
-            log_spec: If set, also log the spectrogram
-            n_fft_ms: FFT size, in milliseconds
-            hop_length_ms: The FFT hop length, in milliseconds
-            channel_select_mode: How to select the channel if the audio is
-                stereo; can be "first", "last", or "mean"; this is only used
-                for the spectrogram
-            keep_resolution: If set, keep the resolution of the
-                spectrogram; otherwise, make human-viewable
         """
+        if not self.active:
+            raise RuntimeError("The logger is not active")
         namespace = self.resolve_namespace(namespace)
         @functools.lru_cache(maxsize=None)
-        def raw_audio_future() -> Array:
-            raise NotImplementedError
+        def images_future() -> LogImage:
+            images, labels = value() if callable(value) else value
+            if max_images is not None:
+                images = images[:max_images]
+                labels = labels[:max_images]
+            images = [get_image(image, target_resolution) for image in images]
+            labeled = [
+                image_with_text(
+                    img.image,
+                    standardize_text(label, max_line_length),
+                    max_num_lines=max_num_lines,
+                    line_spacing=line_spacing,
+                    centered=centered,
+                )
+                for img, label in zip(images, labels)
+            ]
+            tiled = tile_images([img.image for img in labeled], sep)
+            return LogImage(image=tiled)
-        @functools.lru_cache(maxsize=None)
-        def audio_future() -> tuple[Array, int]:
-            raise NotImplementedError
-        self.audio[namespace][key] = audio_future
-        if log_spec:
-            # Using a unique key for the spectrogram is very important because
-            # otherwise Tensorboard will have some issues.
-            self.log_spectrogram(
-                key=f"{key}_spec",
-                value=raw_audio_future,
-                namespace=namespace,
-                sample_rate=sample_rate,
-                n_fft_ms=n_fft_ms,
-                hop_length_ms=hop_length_ms,
-                channel_select_mode=channel_select_mode,
-                keep_resolution=keep_resolution,
-            )
-    def log_audios(
-        self,
-        key: str,
-        value: Callable[[], Array] | Array,
-        *,
-        namespace: str | None = None,
-        sep_ms: float = 0.0,
-        max_audios: int | None = None,
-        sample_rate: int = 44100,
-        log_spec: bool = False,
-        n_fft_ms: float = 32.0,
-        hop_length_ms: float | None = None,
-        channel_select_mode: ChannelSelectMode = "first",
-        spec_sep: int = 0,
-        keep_resolution: bool = False,
-    ) -> None:
-        """Logs multiple audio clips.
-        Args:
-            key: The key being logged
-            value: The audio clip being logged; can be (B, C, T) or (B, T) as
-                a mono (1 channel) or stereo (2 channel) audio clip, with
-                exactly B clips
-            namespace: An optional logging namespace
-            sep_ms: An optional separation amount between adjacent audio clips
-            max_audios: An optional maximum number of audio clips to log
-            sample_rate: The sample rate of the audio clip
-            log_spec: If set, also log the spectrogram
-            n_fft_ms: FFT size, in milliseconds
-            hop_length_ms: The FFT hop length, in milliseconds
-            channel_select_mode: How to select the channel if the audio is
-                stereo; can be "first", "last", or "mean"; this is only used
-                for the spectrogram
-            spec_sep: An optional separation amount between adjacent
-                spectrograms
-            keep_resolution: If set, keep the resolution of the
-                spectrogram; otherwise, make human-viewable
-        """
-        namespace = self.resolve_namespace(namespace)
-        @functools.lru_cache(maxsize=None)
-        def raw_audio_future() -> Array:
-            raise NotImplementedError
-        @functools.lru_cache(maxsize=None)
-        def audio_future() -> tuple[Array, int]:
-            raise NotImplementedError
-        self.audio[namespace][key] = audio_future
-        if log_spec:
-            # Using a unique key for the spectrogram is very important because
-            # otherwise Tensorboard will have some issues.
-            self.log_spectrograms(
-                key=f"{key}_spec",
-                value=raw_audio_future,
-                namespace=namespace,
-                max_audios=max_audios,
-                sample_rate=sample_rate,
-                n_fft_ms=n_fft_ms,
-                hop_length_ms=hop_length_ms,
-                channel_select_mode=channel_select_mode,
-                spec_sep=spec_sep,
-                keep_resolution=keep_resolution,
-            )
-    def log_spectrogram(
-        self,
-        key: str,
-        value: Callable[[], Array] | Array,
-        *,
-        namespace: str | None = None,
-        sample_rate: int = 44100,
-        n_fft_ms: float = 32.0,
-        hop_length_ms: float | None = None,
-        channel_select_mode: ChannelSelectMode = "first",
-        keep_resolution: bool = False,
-    ) -> None:
-        """Logs spectrograms of an audio clip.
-        Args:
-            key: The key being logged
-            value: The audio clip being logged; can be (C, T) or (T) as
-                a mono (1 channel) or stereo (2 channel) audio clip
-            namespace: An optional logging namespace
-            sample_rate: The sample rate of the audio clip
-            n_fft_ms: FFT size, in milliseconds
-            hop_length_ms: The FFT hop length, in milliseconds
-            channel_select_mode: How to select the channel if the audio is
-                stereo; can be "first", "last", or "mean"; this is only used
-                for the spectrogram
-            keep_resolution: If set, keep the resolution of the
-                spectrogram; otherwise, make human-viewable
-        """
-        namespace = self.resolve_namespace(namespace)
-        @functools.lru_cache(maxsize=None)
-        def spec_future() -> Array:
-            raise NotImplementedError
-        self.images[namespace][key] = spec_future
-    def log_spectrograms(
-        self,
-        key: str,
-        value: Callable[[], Array] | Array,
-        *,
-        namespace: str | None = None,
-        max_audios: int | None = None,
-        sample_rate: int = 44100,
-        n_fft_ms: float = 32.0,
-        hop_length_ms: float | None = None,
-        channel_select_mode: ChannelSelectMode = "first",
-        spec_sep: int = 0,
-        keep_resolution: bool = False,
-    ) -> None:
-        """Logs spectrograms of audio clips.
-        Args:
-            key: The key being logged
-            value: The audio clip being logged; can be (B, C, T) or (B, T) as
-                a mono (1 channel) or stereo (2 channel) audio clip, with
-                exactly B clips
-            namespace: An optional logging namespace
-            max_audios: An optional maximum number of audio clips to log
-            sample_rate: The sample rate of the audio clip
-            n_fft_ms: FFT size, in milliseconds
-            hop_length_ms: The FFT hop length, in milliseconds
-            channel_select_mode: How to select the channel if the audio is
-                stereo; can be "first", "last", or "mean"; this is only used
-                for the spectrogram
-            spec_sep: An optional separation amount between adjacent
-                spectrograms
-            keep_resolution: If set, keep the resolution of the
-                spectrogram; otherwise, make human-viewable
-        """
-        namespace = self.resolve_namespace(namespace)
-        @functools.lru_cache(maxsize=None)
-        def spec_future() -> Array:
-            raise NotImplementedError
+        self.images[namespace][key] = images_future
-        self.images[namespace][key] = spec_future
+    def log_file(self, name: str, contents: str) -> None:
+        for logger in self.loggers:
+            logger.log_file(name, contents)
     def log_video(
         self,
         key: str,
-        value: Callable[[], Array] | Array,
+        value: Callable[[], np.ndarray | Array] | np.ndarray | Array,
         *,
+        fps: int = 30,
         namespace: str | None = None,
-        fps: int | None = None,
-        length: float | None = None,
     ) -> None:
         """Logs a video.
         Args:
             key: The key being logged
-            value: The video being logged; the video can be (T, C, H, W),
-                (T, H, W, C) or (T, H, W) as an RGB (3 channel) or grayscale
-                (1 channel) video
+            value: The video frames. Can be:
+                - A numpy array of shape (T,H,W,C) or (T,C,H,W)
+                - A JAX array of shape (T,H,W,C) or (T,C,H,W)
+            fps: Frames per second
             namespace: An optional logging namespace
-            fps: The video frames per second
-            length: The desired video length, in seconds, at the target FPS
         """
+        if not self.active:
+            raise RuntimeError("The logger is not active")
         namespace = self.resolve_namespace(namespace)
         @functools.lru_cache(maxsize=None)
-        def video_future() -> Array:
-            raise NotImplementedError
+        def video_future() -> LogVideo:
+            return get_video(value() if callable(value) else value, fps=fps)
         self.videos[namespace][key] = video_future
-    def log_videos(
-        self,
-        key: str,
-        value: Callable[[], Array | list[Array]] | Array | list[Array],
-        *,
-        namespace: str | None = None,
-        max_videos: int | None = None,
-        sep: int = 0,
-        fps: int | None = None,
-        length: int | None = None,
-    ) -> None:
-        """Logs a set of video.
-        Args:
-            key: The key being logged
-            value: The videos being logged; the video can be (B, T, C, H, W),
-                (B, T, H, W, C) or (B T, H, W) as an RGB (3 channel) or
-                grayscale (1 channel) video
-            namespace: An optional logging namespace
-            max_videos: The maximum number of videos to show; extra images
-                are clipped
-            sep: An optional separation amount between adjacent videos
-            fps: The video frames per second
-            length: The desired video length, in seconds, at the target FPS
-        """
-        namespace = self.resolve_namespace(namespace)
-        @functools.lru_cache(maxsize=None)
-        def videos_future() -> Array:
-            raise NotImplementedError
-        self.videos[namespace][key] = videos_future
-    def log_histogram(
-        self,
-        key: str,
-        value: Callable[[], Array] | Array,
-        *,
-        namespace: str | None = None,
-    ) -> None:
-        """Logs a histogram.
-        Args:
-            key: The key being logged
-            value: The values to create a histogram from, with arbitrary shape
-            namespace: An optional logging namespace
-        """
-        namespace = self.resolve_namespace(namespace)
-        @functools.lru_cache(maxsize=None)
-        def histogram_future() -> Array:
-            raise NotImplementedError
-        self.histograms[namespace][key] = histogram_future
-    def log_point_cloud(
-        self,
-        key: str,
-        value: Callable[[], Array] | Array,
-        *,
-        namespace: str | None = None,
-        max_points: int = 1000,
-        colors: Callable[[], Array] | Array | None = None,
-    ) -> None:
-        """Logs a point cloud.
-        Args:
-            key: The key being logged
-            value: The point cloud values, with shape (N, 3) or (B, ..., 3);
-                can pass multiple batches in order to show multiple point
-                clouds
-            namespace: An optional logging namespace
-            max_points: An optional maximum number of points in the point cloud
-            colors: An optional color for each point, with the same shape as
-                the point cloud
-        """
-        namespace = self.resolve_namespace(namespace)
-        @functools.lru_cache(maxsize=None)
-        def point_cloud_future() -> tuple[Array, Array | None]:
-            raise NotImplementedError
-        self.point_clouds[namespace][key] = point_cloud_future
-    def log_git_state(self, git_state: str) -> None:
-        for logger in self.loggers:
-            logger.log_git_state(git_state)
-    def log_training_code(self, training_code: str) -> None:
-        for logger in self.loggers:
-            logger.log_training_code(training_code)
-    def log_config(self, config: DictConfig) -> None:
-        for logger in self.loggers:
-            logger.log_config(config)
     def __enter__(self) -> Self:
+        self.active = True
         for logger in self.loggers:
             logger.start()
         return self
@@ -846,3 +852,4 @@ class Logger:
     def __exit__(self, _t: type[BaseException] | None, _e: BaseException | None, _tr: TracebackType | None) -> None:
         for logger in self.loggers:
             logger.stop()
+        self.active = False

xax 0.0.3__py3-none-any.whl → 0.0.6__py3-none-any.whl

xax 0.0.3py3-none-any.whl → 0.0.6py3-none-any.whl