PyPI - wsi-toolbox - Versions diffs - 0.1.0__py3-none-any.whl - Mend

wsi-toolbox 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

wsi_toolbox/__init__.py +119 -0
wsi_toolbox/app.py +753 -0
wsi_toolbox/cli.py +485 -0
wsi_toolbox/commands/__init__.py +92 -0
wsi_toolbox/commands/clustering.py +214 -0
wsi_toolbox/commands/dzi_export.py +202 -0
wsi_toolbox/commands/patch_embedding.py +199 -0
wsi_toolbox/commands/preview.py +335 -0
wsi_toolbox/commands/wsi.py +196 -0
wsi_toolbox/exp.py +466 -0
wsi_toolbox/models.py +38 -0
wsi_toolbox/utils/__init__.py +153 -0
wsi_toolbox/utils/analysis.py +127 -0
wsi_toolbox/utils/cli.py +25 -0
wsi_toolbox/utils/helpers.py +57 -0
wsi_toolbox/utils/progress.py +206 -0
wsi_toolbox/utils/seed.py +21 -0
wsi_toolbox/utils/st.py +53 -0
wsi_toolbox/watcher.py +261 -0
wsi_toolbox/wsi_files.py +187 -0
wsi_toolbox-0.1.0.dist-info/METADATA +269 -0
wsi_toolbox-0.1.0.dist-info/RECORD +25 -0
wsi_toolbox-0.1.0.dist-info/WHEEL +4 -0
wsi_toolbox-0.1.0.dist-info/entry_points.txt +2 -0
wsi_toolbox-0.1.0.dist-info/licenses/LICENSE +21 -0

wsi_toolbox/commands/preview.py ADDED Viewed

@@ -0,0 +1,335 @@
+"""
+Preview generation commands using Template Method Pattern
+"""
+import h5py
+import numpy as np
+from PIL import Image, ImageFont
+from matplotlib import pyplot as plt, colors as mcolors
+from sklearn.preprocessing import MinMaxScaler
+from sklearn.decomposition import PCA
+from ..utils import create_frame, get_platform_font
+from . import _get, _progress
+class BasePreviewCommand:
+    """
+    Base class for preview commands using Template Method Pattern
+    Subclasses must implement:
+    - _prepare(f, **kwargs): Prepare data (frames, scores, etc.)
+    - _get_frame(index, data, f): Get frame for specific patch
+    """
+    def __init__(self, size: int = 64, font_size: int = 16,
+                 model_name: str | None = None):
+        """
+        Initialize preview command
+        Args:
+            size: Thumbnail patch size
+            font_size: Font size for labels
+            model_name: Model name (None to use global default)
+        """
+        self.size = size
+        self.font_size = font_size
+        self.model_name = _get('model_name', model_name)
+    def __call__(self, hdf5_path: str, **kwargs) -> Image.Image:
+        """
+        Template method - common workflow for all preview commands
+        Args:
+            hdf5_path: Path to HDF5 file
+            **kwargs: Subclass-specific arguments
+        Returns:
+            PIL.Image: Thumbnail image
+        """
+        S = self.size
+        with h5py.File(hdf5_path, 'r') as f:
+            # Load metadata
+            cols, rows, patch_count, patch_size = self._load_metadata(f)
+            # Subclass-specific preparation
+            data = self._prepare(f, **kwargs)
+            # Create canvas
+            canvas = Image.new('RGB', (cols * S, rows * S), (0, 0, 0))
+            # Render all patches (common loop)
+            tq = _progress(range(patch_count))
+            for i in tq:
+                coord = f['coordinates'][i]
+                patch_array = f['patches'][i]
+                # Get subclass-specific frame
+                frame = self._get_frame(i, data, f)
+                # Render patch
+                x, y = coord // patch_size * S
+                patch = Image.fromarray(patch_array).resize((S, S))
+                if frame:
+                    patch.paste(frame, (0, 0), frame)
+                canvas.paste(patch, (x, y, x + S, y + S))
+        return canvas
+    def _load_metadata(self, f: h5py.File):
+        """Load common metadata"""
+        cols = f['metadata/cols'][()]
+        rows = f['metadata/rows'][()]
+        patch_count = f['metadata/patch_count'][()]
+        patch_size = f['metadata/patch_size'][()]
+        return cols, rows, patch_count, patch_size
+    def _prepare(self, f: h5py.File, **kwargs):
+        """
+        Prepare data for rendering (implemented by subclass)
+        Args:
+            f: HDF5 file handle
+            **kwargs: Subclass-specific arguments
+        Returns:
+            Any data structure needed for _get_frame()
+        """
+        raise NotImplementedError
+    def _get_frame(self, index: int, data, f: h5py.File):
+        """
+        Get frame for specific patch (implemented by subclass)
+        Args:
+            index: Patch index
+            data: Data prepared by _prepare()
+            f: HDF5 file handle
+        Returns:
+            PIL.Image or None: Frame overlay
+        """
+        raise NotImplementedError
+class PreviewClustersCommand(BasePreviewCommand):
+    """
+    Generate thumbnail with cluster visualization
+    Usage:
+        cmd = PreviewClustersCommand(size=64)
+        image = cmd(hdf5_path='data.h5', cluster_name='test')
+    """
+    def _prepare(self, f: h5py.File, cluster_name: str = ''):
+        """
+        Prepare cluster frames
+        Args:
+            f: HDF5 file handle
+            cluster_name: Cluster name suffix
+        Returns:
+            dict with 'clusters' and 'frames'
+        """
+        # Load clusters
+        cluster_path = f'{self.model_name}/clusters'
+        if cluster_name:
+            cluster_path += f'_{cluster_name}'
+        if cluster_path not in f:
+            raise RuntimeError(f'{cluster_path} does not exist in HDF5 file')
+        clusters = f[cluster_path][:]
+        # Prepare frames for each cluster
+        font = ImageFont.truetype(font=get_platform_font(), size=self.font_size)
+        cmap = plt.get_cmap('tab20')
+        frames = {}
+        for cluster in np.unique(clusters).tolist() + [-1]:
+            color = mcolors.rgb2hex(cmap(cluster)[:3]) if cluster >= 0 else '#111'
+            frames[cluster] = create_frame(self.size, color, f'{cluster}', font)
+        return {'clusters': clusters, 'frames': frames}
+    def _get_frame(self, index: int, data, f: h5py.File):
+        """Get frame for cluster at index"""
+        cluster = data['clusters'][index]
+        return data['frames'][cluster] if cluster >= 0 else None
+class PreviewScoresCommand(BasePreviewCommand):
+    """
+    Generate thumbnail with score visualization
+    Usage:
+        cmd = PreviewScoresCommand(size=64)
+        image = cmd(hdf5_path='data.h5', score_name='pca')
+    """
+    def _prepare(self, f: h5py.File, score_name: str):
+        """
+        Prepare score visualization data
+        Args:
+            f: HDF5 file handle
+            score_name: Score dataset name
+        Returns:
+            dict with 'scores', 'cmap', and 'font'
+        """
+        # Load scores
+        score_path = f'{self.model_name}/scores_{score_name}'
+        scores = f[score_path][()]
+        # Prepare font and colormap
+        font = ImageFont.truetype(font=get_platform_font(), size=self.font_size)
+        cmap = plt.get_cmap('viridis')
+        return {'scores': scores, 'cmap': cmap, 'font': font}
+    def _get_frame(self, index: int, data, f: h5py.File):
+        """Get frame for score at index"""
+        score = data['scores'][index]
+        if np.isnan(score):
+            return None
+        color = mcolors.rgb2hex(data['cmap'](score)[:3])
+        return create_frame(self.size, color, f'{score:.3f}', data['font'])
+class PreviewLatentPCACommand(BasePreviewCommand):
+    """
+    Generate thumbnail with latent PCA visualization
+    Usage:
+        cmd = PreviewLatentPCACommand(size=64)
+        image = cmd(hdf5_path='data.h5', alpha=0.5)
+    """
+    def _prepare(self, f: h5py.File, alpha: float = 0.5):
+        """
+        Prepare latent PCA visualization data
+        Args:
+            f: HDF5 file handle
+            alpha: Transparency of overlay (0.0-1.0)
+        Returns:
+            dict with 'overlays' and 'alpha_mask'
+        """
+        # Load latent features
+        h = f[f'{self.model_name}/latent_features'][()]  # B, L(16x16), EMB(1024)
+        h = h.astype(np.float32)
+        s = h.shape
+        # Estimate original latent size
+        latent_size = int(np.sqrt(s[1]))  # l = sqrt(L)
+        # Validate dyadicity
+        assert latent_size**2 == s[1]
+        if self.size % latent_size != 0:
+            print(f'WARNING: {self.size} is not divisible by {latent_size}')
+        # Apply PCA
+        pca = PCA(n_components=3)
+        latent_pca = pca.fit_transform(h.reshape(s[0] * s[1], s[-1]))  # B*L, 3
+        # Normalize to [0, 1]
+        scaler = MinMaxScaler()
+        latent_pca = scaler.fit_transform(latent_pca)
+        # Reshape and convert to RGB
+        latent_pca = latent_pca.reshape(s[0], latent_size, latent_size, 3)
+        overlays = (latent_pca * 255).astype(np.uint8)  # B, l, l, 3
+        # Create alpha mask
+        alpha_mask = Image.new('L', (self.size, self.size), int(alpha * 255))
+        return {'overlays': overlays, 'alpha_mask': alpha_mask, 'latent_size': latent_size}
+    def _get_frame(self, index: int, data, f: h5py.File):
+        """
+        Get latent PCA overlay as a frame for patch at index
+        Args:
+            index: Patch index
+            data: Data prepared by _prepare()
+            f: HDF5 file handle
+        Returns:
+            PIL.Image: RGBA overlay image
+        """
+        # Get overlay for this patch
+        overlay = Image.fromarray(data['overlays'][index]).convert('RGBA')
+        overlay = overlay.resize((self.size, self.size), Image.NEAREST)
+        # Apply alpha mask to make it an overlay
+        overlay.putalpha(data['alpha_mask'])
+        return overlay
+class PreviewLatentClusterCommand(BasePreviewCommand):
+    """
+    Generate thumbnail with latent cluster visualization
+    Usage:
+        cmd = PreviewLatentClusterCommand(size=64)
+        image = cmd(hdf5_path='data.h5', alpha=0.5)
+    """
+    def _prepare(self, f: h5py.File, alpha: float = 0.5):
+        """
+        Prepare latent cluster visualization data
+        Args:
+            f: HDF5 file handle
+            alpha: Transparency of overlay (0.0-1.0)
+        Returns:
+            dict with 'overlays' and 'alpha_mask'
+        """
+        # Load latent clusters
+        clusters = f[f'{self.model_name}/latent_clusters'][()]  # B, L(16x16)
+        s = clusters.shape
+        # Estimate original latent size
+        latent_size = int(np.sqrt(s[1]))  # l = sqrt(L)
+        # Validate dyadicity
+        assert latent_size**2 == s[1]
+        if self.size % latent_size != 0:
+            print(f'WARNING: {self.size} is not divisible by {latent_size}')
+        # Apply colormap
+        cmap = plt.get_cmap('tab20')
+        latent_map = cmap(clusters)
+        latent_map = latent_map.reshape(s[0], latent_size, latent_size, 4)
+        overlays = (latent_map * 255).astype(np.uint8)  # B, l, l, 4
+        # Create alpha mask
+        alpha_mask = Image.new('L', (self.size, self.size), int(alpha * 255))
+        return {'overlays': overlays, 'alpha_mask': alpha_mask, 'latent_size': latent_size}
+    def _get_frame(self, index: int, data, f: h5py.File):
+        """
+        Get latent cluster overlay as a frame for patch at index
+        Args:
+            index: Patch index
+            data: Data prepared by _prepare()
+            f: HDF5 file handle
+        Returns:
+            PIL.Image: RGBA overlay image
+        """
+        # Get overlay for this patch
+        overlay = Image.fromarray(data['overlays'][index]).convert('RGBA')
+        overlay = overlay.resize((self.size, self.size), Image.NEAREST)
+        # Apply alpha mask to make it an overlay
+        overlay.putalpha(data['alpha_mask'])
+        return overlay

wsi_toolbox/commands/wsi.py ADDED Viewed

@@ -0,0 +1,196 @@
+"""
+WSI to HDF5 conversion command
+"""
+import cv2
+import h5py
+import numpy as np
+from pydantic import BaseModel
+from ..wsi_files import create_wsi_file
+from ..utils.helpers import is_white_patch
+from . import _config, _progress
+class Wsi2HDF5Result(BaseModel):
+    """Result of WSI to HDF5 conversion"""
+    mpp: float
+    original_mpp: float
+    scale: int
+    patch_count: int
+    patch_size: int
+    cols: int
+    rows: int
+    output_path: str
+class Wsi2HDF5Command:
+    """
+    Convert WSI image to HDF5 format with patch extraction
+    Usage:
+        # Set global config once
+        commands.set_default_progress('tqdm')
+        # Create and run command
+        cmd = Wsi2HDF5Command(patch_size=256, engine='auto')
+        result = cmd(input_path='image.ndpi', output_path='output.h5')
+    """
+    def __init__(self,
+                 patch_size: int = 256,
+                 engine: str = 'auto',
+                 mpp: float = 0,
+                 rotate: bool = True):
+        """
+        Initialize WSI to HDF5 converter
+        Args:
+            patch_size: Size of patches to extract
+            engine: WSI reader engine ('auto', 'openslide', 'tifffile', 'standard')
+            mpp: Microns per pixel (for standard images)
+            rotate: Whether to rotate patches 180 degrees
+        Note:
+            progress and verbose are controlled by global config:
+            - commands.set_default_progress('tqdm')
+            - commands.set_verbose(True/False)
+        """
+        self.patch_size = patch_size
+        self.engine = engine
+        self.mpp = mpp
+        self.rotate = rotate
+    def __call__(self, input_path: str, output_path: str) -> Wsi2HDF5Result:
+        """
+        Execute WSI to HDF5 conversion
+        Args:
+            input_path: Path to input WSI file
+            output_path: Path to output HDF5 file
+        Returns:
+            Wsi2HDF5Result: Metadata including mpp, scale, patch_count
+        """
+        # Create WSI reader
+        wsi = create_wsi_file(input_path, engine=self.engine, mpp=self.mpp)
+        # Calculate scale based on mpp
+        original_mpp = wsi.get_mpp()
+        if 0.360 < original_mpp < 0.500:
+            scale = 1
+        elif original_mpp < 0.360:
+            scale = 2
+        else:
+            raise RuntimeError(f'Invalid mpp: {original_mpp:.6f}')
+        mpp = original_mpp * scale
+        # Get image dimensions
+        W, H = wsi.get_original_size()
+        S = self.patch_size  # Scaled patch size
+        T = S * scale        # Original patch size
+        x_patch_count = W // T
+        y_patch_count = H // T
+        width = (W // T) * T
+        row_count = H // T
+        if _config.verbose and _config.progress == 'tqdm':
+            print(f'Original mpp: {original_mpp:.6f}')
+            print(f'Image mpp: {mpp:.6f}')
+            print(f'Target resolutions: {W} x {H}')
+            print(f'Obtained resolutions: {x_patch_count*S} x {y_patch_count*S}')
+            print(f'Scale: {scale}')
+            print(f'Patch size: {T}')
+            print(f'Scaled patch size: {S}')
+            print(f'Row count: {y_patch_count}')
+            print(f'Col count: {x_patch_count}')
+        coordinates = []
+        # Create HDF5 file
+        with h5py.File(output_path, 'w') as f:
+            # Write metadata
+            f.create_dataset('metadata/original_mpp', data=original_mpp)
+            f.create_dataset('metadata/original_width', data=W)
+            f.create_dataset('metadata/original_height', data=H)
+            f.create_dataset('metadata/image_level', data=0)
+            f.create_dataset('metadata/mpp', data=mpp)
+            f.create_dataset('metadata/scale', data=scale)
+            f.create_dataset('metadata/patch_size', data=S)
+            f.create_dataset('metadata/cols', data=x_patch_count)
+            f.create_dataset('metadata/rows', data=y_patch_count)
+            # Create patches dataset
+            total_patches = f.create_dataset(
+                'patches',
+                shape=(x_patch_count * y_patch_count, S, S, 3),
+                dtype=np.uint8,
+                chunks=(1, S, S, 3),
+                compression='gzip',
+                compression_opts=9
+            )
+            # Extract patches row by row
+            cursor = 0
+            tq = _progress(range(row_count))
+            for row in tq:
+                # Read one row
+                image = wsi.read_region((0, row * T, width, T))
+                image = cv2.resize(image, (width // scale, S),
+                                 interpolation=cv2.INTER_LANCZOS4)
+                # Reshape into patches
+                patches = image.reshape(1, S, x_patch_count, S, 3)  # (y, h, x, w, 3)
+                patches = patches.transpose(0, 2, 1, 3, 4)          # (y, x, h, w, 3)
+                patches = patches[0]
+                # Filter white patches and collect valid ones
+                batch = []
+                for col, patch in enumerate(patches):
+                    if is_white_patch(patch):
+                        continue
+                    if self.rotate:
+                        patch = cv2.rotate(patch, cv2.ROTATE_180)
+                        coordinates.append((
+                            (x_patch_count - 1 - col) * S,
+                            (y_patch_count - 1 - row) * S
+                        ))
+                    else:
+                        coordinates.append((col * S, row * S))
+                    batch.append(patch)
+                # Write batch
+                batch = np.array(batch)
+                total_patches[cursor:cursor + len(batch), ...] = batch
+                cursor += len(batch)
+                tq.set_description(
+                    f'Selected {len(batch)}/{len(patches)} patches '
+                    f'(row {row}/{y_patch_count})'
+                )
+                tq.refresh()
+            # Resize to actual patch count and save coordinates
+            patch_count = len(coordinates)
+            f.create_dataset('coordinates', data=coordinates)
+            f['patches'].resize((patch_count, S, S, 3))
+            f.create_dataset('metadata/patch_count', data=patch_count)
+        if _config.verbose and _config.progress == 'tqdm':
+            print(f'{patch_count} patches were selected.')
+        return Wsi2HDF5Result(
+            mpp=mpp,
+            original_mpp=original_mpp,
+            scale=scale,
+            patch_count=patch_count,
+            patch_size=S,
+            cols=x_patch_count,
+            rows=y_patch_count,
+            output_path=output_path
+        )