PyPI - wsi-toolbox - Versions diffs - 0.1.0__py3-none-any.whl - Mend

wsi-toolbox 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

wsi_toolbox/__init__.py +119 -0
wsi_toolbox/app.py +753 -0
wsi_toolbox/cli.py +485 -0
wsi_toolbox/commands/__init__.py +92 -0
wsi_toolbox/commands/clustering.py +214 -0
wsi_toolbox/commands/dzi_export.py +202 -0
wsi_toolbox/commands/patch_embedding.py +199 -0
wsi_toolbox/commands/preview.py +335 -0
wsi_toolbox/commands/wsi.py +196 -0
wsi_toolbox/exp.py +466 -0
wsi_toolbox/models.py +38 -0
wsi_toolbox/utils/__init__.py +153 -0
wsi_toolbox/utils/analysis.py +127 -0
wsi_toolbox/utils/cli.py +25 -0
wsi_toolbox/utils/helpers.py +57 -0
wsi_toolbox/utils/progress.py +206 -0
wsi_toolbox/utils/seed.py +21 -0
wsi_toolbox/utils/st.py +53 -0
wsi_toolbox/watcher.py +261 -0
wsi_toolbox/wsi_files.py +187 -0
wsi_toolbox-0.1.0.dist-info/METADATA +269 -0
wsi_toolbox-0.1.0.dist-info/RECORD +25 -0
wsi_toolbox-0.1.0.dist-info/WHEEL +4 -0
wsi_toolbox-0.1.0.dist-info/entry_points.txt +2 -0
wsi_toolbox-0.1.0.dist-info/licenses/LICENSE +21 -0

wsi_toolbox/watcher.py ADDED Viewed

@@ -0,0 +1,261 @@
+import os
+import time
+import argparse
+import asyncio
+from pathlib import Path
+from typing import Dict, Set, Callable, Optional
+from rich.console import Console
+from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
+from .utils import plot_umap
+from . import commands
+DEFAULT_MODEL = os.getenv('DEFAULT_MODEL', 'uni')
+class Status:
+    PROCESSING = "PROCESSING"
+    DONE = "DONE"
+    ERROR = "ERROR"
+    @classmethod
+    def is_processing_state(cls, status: str) -> bool:
+        """状態が処理中系かどうかを判定"""
+        return status.startswith((cls.PROCESSING, cls.DONE, cls.ERROR))
+class Task:
+    REQUEST_FILE = "_ROBIEMON.txt"
+    LOG_FILE = "_ROBIEMON_LOG.txt"
+    @staticmethod
+    def parse_request_line(line: str) -> tuple[str, bool]:
+        """Parse the request line for model and rotation specifications.
+        Returns (model_name, should_rotate)"""
+        parts = [p.strip() for p in line.split(',')]
+        model_name = parts[0] if parts and parts[0] else DEFAULT_MODEL
+        should_rotate = len(parts) > 1 and parts[1].lower() == 'rotate'
+        return model_name, should_rotate
+    def __init__(self, folder:Path, options_line:str, on_complete:Optional[Callable[[Path], None]] = None):
+        self.folder = folder
+        self.options_line = options_line
+        self.model_name, self.should_rotate = self.parse_request_line(options_line)
+        self.on_complete = on_complete
+        self.wsi_files = list(folder.glob("**/*.ndpi")) + list(folder.glob("**/*.svs"))
+        self.wsi_files.sort()
+        commands.set_default_progress('tqdm')
+        commands.set_default_model(self.model_name)
+    def write_banner(self):
+        """処理開始時のバナーをログに書き込み"""
+        self.append_log("="*50)
+        self.append_log(f"Processing folder: {self.folder}")
+        self.append_log(f"Request options: {self.options_line}")
+        self.append_log(f"Parsed options:")
+        self.append_log(f"  - Model: {self.model_name} (default: {DEFAULT_MODEL})")
+        self.append_log(f"  - Rotation: {'enabled' if self.should_rotate else 'disabled'}")
+        self.append_log(f"Found {len(self.wsi_files)} WSI files:")
+        for i, wsi_file in enumerate(self.wsi_files, 1):
+            size_mb = wsi_file.stat().st_size / (1024 * 1024)
+            self.append_log(f"  {i}. {wsi_file.name} ({size_mb:.1f} MB)")
+        self.append_log("="*50)
+    def run(self):
+        try:
+            # ログファイルをクリア
+            with open(self.folder / self.LOG_FILE, "w") as f:
+                f.write("")
+            self.set_status(Status.PROCESSING)
+            self.write_banner()
+            # WSIファイルごとの処理
+            for i, wsi_file in enumerate(self.wsi_files):
+                try:
+                    self.append_log(f"Processing [{i+1}/{len(self.wsi_files )}]: {wsi_file.name}")
+                    hdf5_tmp_path = wsi_file.with_suffix('.h5.tmp')
+                    hdf5_file = wsi_file.with_suffix(".h5")
+                    # HDF5変換（既存の場合はスキップ）
+                    if not hdf5_file.exists():
+                        self.append_log("Converting to HDF5...")
+                        # Use new command pattern
+                        commands.set_default_progress('tqdm')
+                        cmd = commands.Wsi2HDF5Command(rotate=self.should_rotate)
+                        result = cmd(str(wsi_file), str(hdf5_tmp_path))
+                        os.rename(hdf5_tmp_path, hdf5_file)
+                        self.append_log("HDF5 conversion completed.")
+                    # 特徴量抽出（既存の場合はスキップ）
+                    self.append_log("Extracting features...")
+                    # Use new command pattern
+                    commands.set_default_device('cuda')
+                    emb_cmd = commands.PatchEmbeddingCommand()
+                    emb_result = emb_cmd(str(hdf5_file))
+                    self.append_log("Feature extraction completed.")
+                    # クラスタリングとUMAP生成
+                    self.append_log("Starting clustering ...")
+                    # Use new command pattern
+                    cluster_cmd = commands.ClusteringCommand(
+                        resolution=1.0,
+                        use_umap=True
+                    )
+                    cluster_result = cluster_cmd([hdf5_file])
+                    self.append_log("Clustering completed.")
+                    base = str(wsi_file.with_suffix(""))
+                    # UMAPプロット生成
+                    self.append_log("Starting UMAP generation...")
+                    umap_path = Path(f"{base}_umap.png")
+                    if not umap_path.exists():
+                        umap_embs = cluster_cmd.get_umap_embeddings()
+                        fig = plot_umap(umap_embs, cluster_cmd.total_clusters)
+                        fig.savefig(umap_path, bbox_inches='tight', pad_inches=0.5)
+                        self.append_log(f"UMAP plot completed. Saved to {os.path.basename(umap_path)}")
+                    else:
+                        self.append_log(f"UMAP plot already exists. Skipped.")
+                    # サムネイル生成
+                    self.append_log("Starting thumbnail generation...")
+                    thumb_path = Path(f"{base}_thumb.jpg")
+                    if not thumb_path.exists():
+                        # Use new command pattern
+                        preview_cmd = commands.PreviewClustersCommand(size=64)
+                        img = preview_cmd(str(hdf5_file), cluster_name='')
+                        img.save(thumb_path)
+                        self.append_log(f"Thumbnail generation completed. Saved to {thumb_path.name}")
+                    else:
+                        self.append_log(f"Thumbnail already exists. Skipped.")
+                    self.append_log("="*30)
+                except Exception as e:
+                    self.append_log(f"Error processing {wsi_file}: {str(e)}")
+                    self.set_status(Status.ERROR)
+                    if self.on_complete:
+                        self.on_complete(self.folder)
+                    return
+            self.set_status(Status.DONE)
+            self.append_log("All processing completed successfully")
+        except Exception as e:
+            self.append_log(f"Error: {str(e)}")
+        if self.on_complete:
+            self.on_complete(self.folder)
+    def set_status(self, status: str):
+        self.status = status
+        with open(self.folder / self.REQUEST_FILE, "w") as f:
+            f.write(f"{status}\n")
+    def append_log(self, message: str):
+        with open(self.folder / self.LOG_FILE, "a") as f:
+            f.write(message + "\n")
+            print(message)
+class Watcher:
+    def __init__(self, base_dir: str):
+        self.base_dir = Path(base_dir)
+        self.running_tasks: Dict[Path, Task] = {}
+        self.console = Console()
+    def run(self, interval: int = 60):
+        self.console.print("\n[bold blue]ROBIEMON Watcher started[/]")
+        self.console.print(f"[blue]Watching directory:[/] {self.base_dir}")
+        self.console.print(f"[blue]Polling interval:[/] {interval} seconds")
+        self.console.print("[yellow]Press Ctrl+C to stop[/]\n")
+        while True:
+            try:
+                self.check_folders()
+                # カウントダウン表示
+                for remaining in range(interval, 0, -1):
+                    print(f"\rNext check in {remaining:2d}s", end="", flush=True)
+                    time.sleep(1)
+                # カウントダウン終了後、同じ行を再利用
+                print("\rNext check in  0s", end="", flush=True)
+            except KeyboardInterrupt:
+                self.console.print("\n[yellow]Stopping watcher...[/]")
+                break
+            except Exception as e:
+                self.console.print(f"[red]ERROR:[/] {str(e)}")
+    def check_folders(self):
+        for folder in self.base_dir.rglob("*"):
+            if not folder.is_dir():
+                continue
+            request_file = folder / Task.REQUEST_FILE
+            if not request_file.exists():
+                continue
+            if folder in self.running_tasks:
+                continue
+            try:
+                with open(request_file, "r") as f:
+                    content = f.read()
+                    if not content.strip():
+                        continue
+                    # First line contains model/rotation specs
+                    options_line = content.split('\n')[0].strip()
+                    # Original status check from the entire file
+                    status = content.strip()
+            except:
+                continue
+            if Status.is_processing_state(status):
+                continue
+            # \rを含むログから改行するため空白行を挿入
+            print()
+            print()
+            print(f"detected: {folder}")
+            print(f"Request options: {options_line}")
+            task = Task(folder, options_line, on_complete=lambda f: self.running_tasks.pop(f, None))
+            self.running_tasks[folder] = task
+            task.run()  # 同期実行に変更
+BASE_DIR = os.getenv('BASE_DIR', 'data')
+def main():
+    parser = argparse.ArgumentParser(description="ROBIEMON WSI Processor Watcher")
+    parser.add_argument(
+        "--base-dir",
+        type=str,
+        default=BASE_DIR,
+        help="Base directory to watch for WSI processing requests"
+    )
+    parser.add_argument(
+        "--interval",
+        type=int,
+        default=60,
+        help="Polling interval in seconds (default: 60)"
+    )
+    args = parser.parse_args()
+    base_dir = Path(args.base_dir)
+    if not base_dir.exists():
+        print(f"Error: Base directory '{args.base_dir}' does not exist")
+        return
+    if not base_dir.is_dir():
+        print(f"Error: '{args.base_dir}' is not a directory")
+        return
+    watcher = Watcher(args.base_dir)
+    watcher.run(interval=args.interval)  # asyncio.runを削除
+if __name__ == "__main__":
+    main()

wsi_toolbox/wsi_files.py ADDED Viewed

@@ -0,0 +1,187 @@
+"""
+WSI (Whole Slide Image) file handling classes.
+Provides unified interface for different WSI formats:
+- OpenSlide compatible formats (.svs, .tiff, etc.)
+- TIFF files (.ndpi, .tif)
+- Standard images (.jpg, .png)
+"""
+import os
+import cv2
+import numpy as np
+from openslide import OpenSlide
+import tifffile
+import zarr
+class WSIFile:
+    """Base class for WSI file readers"""
+    def __init__(self, path):
+        pass
+    def get_mpp(self):
+        """Get microns per pixel"""
+        pass
+    def get_original_size(self):
+        """Get original image size (width, height)"""
+        pass
+    def read_region(self, xywh):
+        """Read region as RGB numpy array
+        Args:
+            xywh: tuple of (x, y, width, height)
+        Returns:
+            np.ndarray: RGB image (H, W, 3)
+        """
+        pass
+class TiffFile(WSIFile):
+    """TIFF file reader using tifffile library"""
+    def __init__(self, path):
+        self.tif = tifffile.TiffFile(path)
+        store = self.tif.pages[0].aszarr()
+        self.zarr_data = zarr.open(store, mode='r')  # 読み込み専用で開く
+    def get_original_size(self):
+        s = self.tif.pages[0].shape
+        return (s[1], s[0])
+    def get_mpp(self):
+        tags = self.tif.pages[0].tags
+        resolution_unit = tags.get('ResolutionUnit', None)
+        x_resolution = tags.get('XResolution', None)
+        assert resolution_unit
+        assert x_resolution
+        x_res_value = x_resolution.value
+        if isinstance(x_res_value, tuple) and len(x_res_value) == 2:
+            # 分数の形式（分子/分母）
+            numerator, denominator = x_res_value
+            resolution = numerator / denominator
+        else:
+            resolution = x_res_value
+        # 解像度単位の判定（2=インチ、3=センチメートル）
+        if resolution_unit.value == 2:  # インチ
+            # インチあたりのピクセル数からミクロンあたりのピクセル数へ変換
+            # 1インチ = 25400ミクロン
+            mpp = 25400.0 / resolution
+        elif resolution_unit.value == 3:  # センチメートル
+            # センチメートルあたりのピクセル数からミクロンあたりのピクセル数へ変換
+            # 1センチメートル = 10000ミクロン
+            mpp = 10000.0 / resolution
+        else:
+            mpp = 1.0 / resolution  # 単位不明の場合
+        return mpp
+    def read_region(self, xywh):
+        x, y, width, height = xywh
+        page = self.tif.pages[0]
+        full_width = page.shape[1]  # tifffileでは[height, width]の順
+        full_height = page.shape[0]
+        x = max(0, min(x, full_width - 1))
+        y = max(0, min(y, full_height - 1))
+        width = min(width, full_width - x)
+        height = min(height, full_height - y)
+        if page.is_tiled:
+            region = self.zarr_data[y:y+height, x:x+width]
+        else:
+            full_image = page.asarray()
+            region = full_image[y:y+height, x:x+width]
+        # カラーモデルの処理
+        if region.ndim == 2:  # グレースケール
+            region = np.stack([region, region, region], axis=-1)
+        elif region.shape[2] == 4:  # RGBA
+            region = region[:, :, :3]  # RGBのみ取得
+        return region
+class OpenSlideFile(WSIFile):
+    """OpenSlide compatible file reader"""
+    def __init__(self, path):
+        self.wsi = OpenSlide(path)
+        self.prop = dict(self.wsi.properties)
+    def get_mpp(self):
+        return float(self.prop['openslide.mpp-x'])
+    def get_original_size(self):
+        dim = self.wsi.level_dimensions[0]
+        return (dim[0], dim[1])
+    def read_region(self, xywh):
+        # self.wsi.read_region((0, row*T), target_level, (width, T))
+        # self.wsi.read_region((x, y), target_level, (w, h))
+        img = self.wsi.read_region((xywh[0], xywh[1]), 0, (xywh[2], xywh[3])).convert('RGB')
+        img = np.array(img.convert('RGB'))
+        return img
+class StandardImage(WSIFile):
+    """Standard image file reader (JPG, PNG, etc.)"""
+    def __init__(self, path, mpp):
+        self.image = cv2.imread(path)
+        self.image = cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB)  # OpenCVはBGR形式で読み込むのでRGBに変換
+        self.mpp = mpp
+        assert self.mpp is not None, 'Specify mpp when using StandardImage'
+    def get_mpp(self):
+        return self.mpp
+    def get_original_size(self):
+        return self.image.shape[1], self.image.shape[0]  # width, height
+    def read_region(self, xywh):
+        x, y, w, h = xywh
+        return self.image[y:y+h, x:x+w]
+def create_wsi_file(image_path: str, engine: str = 'auto', **kwargs) -> WSIFile:
+    """
+    Factory function to create appropriate WSIFile instance
+    Args:
+        image_path: Path to WSI file
+        engine: Engine type ('auto', 'openslide', 'tifffile', 'standard')
+        **kwargs: Additional arguments (e.g., mpp for standard images)
+    Returns:
+        WSIFile: Appropriate WSIFile subclass instance
+    """
+    if engine == 'auto':
+        ext = os.path.splitext(image_path)[1].lower()
+        if ext == '.ndpi':
+            engine = 'tifffile'
+        elif ext in ['.jpg', '.jpeg', '.png', '.tif', 'tiff']:
+            engine = 'standard'
+        else:
+            engine = 'openslide'
+        print(f'using {engine} engine for {os.path.basename(image_path)}')
+    engine = engine.lower()
+    if engine == 'openslide':
+        return OpenSlideFile(image_path)
+    elif engine == 'tifffile':
+        return TiffFile(image_path)
+    elif engine == 'standard':
+        mpp = kwargs.get('mpp', None)
+        return StandardImage(image_path, mpp=mpp)
+    else:
+        raise ValueError(f'Invalid engine: {engine}')