PyPI - wsi-toolbox - Versions diffs - 0.1.0__py3-none-any.whl - Mend

wsi-toolbox 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

wsi_toolbox/__init__.py +119 -0
wsi_toolbox/app.py +753 -0
wsi_toolbox/cli.py +485 -0
wsi_toolbox/commands/__init__.py +92 -0
wsi_toolbox/commands/clustering.py +214 -0
wsi_toolbox/commands/dzi_export.py +202 -0
wsi_toolbox/commands/patch_embedding.py +199 -0
wsi_toolbox/commands/preview.py +335 -0
wsi_toolbox/commands/wsi.py +196 -0
wsi_toolbox/exp.py +466 -0
wsi_toolbox/models.py +38 -0
wsi_toolbox/utils/__init__.py +153 -0
wsi_toolbox/utils/analysis.py +127 -0
wsi_toolbox/utils/cli.py +25 -0
wsi_toolbox/utils/helpers.py +57 -0
wsi_toolbox/utils/progress.py +206 -0
wsi_toolbox/utils/seed.py +21 -0
wsi_toolbox/utils/st.py +53 -0
wsi_toolbox/watcher.py +261 -0
wsi_toolbox/wsi_files.py +187 -0
wsi_toolbox-0.1.0.dist-info/METADATA +269 -0
wsi_toolbox-0.1.0.dist-info/RECORD +25 -0
wsi_toolbox-0.1.0.dist-info/WHEEL +4 -0
wsi_toolbox-0.1.0.dist-info/entry_points.txt +2 -0
wsi_toolbox-0.1.0.dist-info/licenses/LICENSE +21 -0

wsi_toolbox/utils/analysis.py ADDED Viewed

@@ -0,0 +1,127 @@
+import multiprocessing
+import numpy as np
+from sklearn.decomposition import PCA
+from sklearn.neighbors import NearestNeighbors
+import networkx as nx
+import igraph as ig
+import leidenalg as la
+from joblib import Parallel, delayed
+from .progress import tqdm_or_st
+def find_optimal_components(features, threshold=0.95):
+    pca = PCA()
+    pca.fit(features)
+    explained_variance = pca.explained_variance_ratio_
+    # 累積寄与率が95%を超える次元数を選択する例
+    cumulative_variance = np.cumsum(explained_variance)
+    optimal_n = np.argmax(cumulative_variance >= threshold) + 1
+    return min(optimal_n, len(features) - 1)
+def process_edges_batch(batch_indices, all_indices, h, use_umap_embs, pca=None):
+    """Process a batch of nodes and their edges"""
+    edges = []
+    weights = []
+    for i in batch_indices:
+        for j in all_indices[i]:
+            if i == j:  # skip self loop
+                continue
+            if use_umap_embs:
+                distance = np.linalg.norm(h[i] - h[j])
+                weight = np.exp(-distance)
+            else:
+                explained_variance_ratio = pca.explained_variance_ratio_
+                weighted_diff = (h[i] - h[j]) * np.sqrt(explained_variance_ratio[:len(h[i])])
+                distance = np.linalg.norm(weighted_diff)
+                weight = np.exp(-distance / distance.mean())
+            edges.append((i, j))
+            weights.append(weight)
+    return edges, weights
+def leiden_cluster(features, umap_emb_func=None, resolution=1.0, n_jobs=-1, progress='tqdm'):
+    if n_jobs < 0:
+        n_jobs = multiprocessing.cpu_count()
+    use_umap_embs = umap_emb_func is not None
+    n_samples = features.shape[0]
+    progress_count = 5 # (UMAP), PCA, KNN, edges, leiden, Finalize
+    if use_umap_embs:
+        progress_count += 1
+    tq = tqdm_or_st(total=progress_count, backend=progress)
+    # 1. UMAP cluster if needed
+    if use_umap_embs:
+        tq.set_description(f'UMAP projection...')
+        umap_embeddings = umap_emb_func()
+        tq.update(1)
+    else:
+        umap_embeddings = None
+    # 2. pre-PCA
+    tq.set_description(f'Processing PCA...')
+    n_components = find_optimal_components(features)
+    pca = PCA(n_components)
+    target_features = pca.fit_transform(features)
+    tq.update(1)
+    # 3. KNN
+    tq.set_description(f'Processing KNN...')
+    k = int(np.sqrt(len(target_features)))
+    nn = NearestNeighbors(n_neighbors=k).fit(target_features)
+    distances, indices = nn.kneighbors(target_features)
+    tq.update(1)
+    # 4. Build graph
+    tq.set_description(f'Processing edges...')
+    G = nx.Graph()
+    G.add_nodes_from(range(n_samples))
+    h = umap_embeddings if use_umap_embs else target_features
+    batch_size = max(1, n_samples // n_jobs)
+    batches = [list(range(i, min(i + batch_size, n_samples)))
+               for i in range(0, n_samples, batch_size)]
+    results = Parallel(n_jobs=n_jobs)([
+            delayed(process_edges_batch)(batch, indices, h, use_umap_embs, pca)
+            for batch in batches
+        ]
+    )
+    for batch_edges, batch_weights in results:
+        for (i, j), weight in zip(batch_edges, batch_weights):
+            G.add_edge(i, j, weight=weight)
+    tq.update(1)
+    # 5. Leiden clustering
+    tq.set_description(f'Leiden clustering...')
+    edges = list(G.edges())
+    weights = [G[u][v]['weight'] for u, v in edges]
+    ig_graph = ig.Graph(n=n_samples, edges=edges, edge_attrs={'weight': weights})
+    partition = la.find_partition(
+        ig_graph,
+        la.RBConfigurationVertexPartition,
+        weights='weight',
+        resolution_parameter=resolution, # maybe most adaptive
+        # resolution_parameter=1.0, # maybe most adaptive
+        # resolution_parameter=0.5, # more coarse cluster
+    )
+    tq.update(1)
+    # 6. Finalize
+    tq.set_description(f'Finalize...')
+    clusters = np.full(n_samples, -1)  # Initialize all as noise
+    for i, community in enumerate(partition):
+        for node in community:
+            clusters[node] = i
+    tq.update(1)
+    tq.close()
+    return clusters

wsi_toolbox/utils/cli.py ADDED Viewed

@@ -0,0 +1,25 @@
+import os
+import sys
+import re
+from string import capwords
+import inspect
+import asyncio
+from typing import Callable, Type
+import argparse
+from pydantic import BaseModel, Field
+from pydantic_autocli import AutoCLI
+from .seed import fix_global_seed, get_global_seed
+class BaseMLArgs(BaseModel):
+    seed: int = get_global_seed()
+class BaseMLCLI(AutoCLI):
+    class CommonArgs(BaseMLArgs):
+        pass
+    def _pre_common(self, a:BaseMLArgs):
+        fix_global_seed(a.seed)
+        super()._pre_common(a)

wsi_toolbox/utils/helpers.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""
+Helper utility functions for WSI processing
+"""
+import numpy as np
+import h5py
+def is_white_patch(patch, rgb_std_threshold=7.0, white_ratio=0.7):
+    """
+    Check if a patch is mostly white/blank
+    Args:
+        patch: RGB patch (H, W, 3)
+        rgb_std_threshold: Threshold for RGB standard deviation
+        white_ratio: Ratio threshold for white pixels
+    Returns:
+        bool: True if patch is considered white/blank
+    """
+    # white: RGB std < 7.0
+    rgb_std_pixels = np.std(patch, axis=2) < rgb_std_threshold
+    white_pixels = np.sum(rgb_std_pixels)
+    total_pixels = patch.shape[0] * patch.shape[1]
+    white_ratio_calculated = white_pixels / total_pixels
+    # print('whi' if white_ratio_calculated > white_ratio else 'use',
+    #       'std{:.3f}'.format(np.sum(rgb_std_pixels)/total_pixels)
+    #      )
+    return white_ratio_calculated > white_ratio
+def cosine_distance(x, y):
+    """
+    Calculate cosine distance with exponential weighting
+    Args:
+        x: First vector
+        y: Second vector
+    Returns:
+        tuple: (distance, weight)
+    """
+    distance = np.linalg.norm(x - y)
+    weight = np.exp(-distance / distance.mean())
+    return distance, weight
+def safe_del(hdf_file, key_path):
+    """
+    Safely delete a dataset from HDF5 file if it exists
+    Args:
+        hdf_file: h5py.File object
+        key_path: Dataset path to delete
+    """
+    if key_path in hdf_file:
+        del hdf_file[key_path]

wsi_toolbox/utils/progress.py ADDED Viewed

@@ -0,0 +1,206 @@
+import time
+from typing import Iterable, TypeVar, Optional, Union, Any
+T = TypeVar('T')
+class StreamlitProgress:
+    """tqdmと同じインターフェースを持つStreamlitのプログレスバー"""
+    def __init__(self, iterable: Optional[Iterable[T]] = None, total: Optional[int] = None,
+                 desc: str = "", **kwargs):
+        self.iterable = iterable
+        self.total = total if total is not None else (len(iterable) if iterable is not None and hasattr(iterable, "__len__") else None)
+        self.desc = desc
+        self.n = 0
+        self.kwargs = kwargs
+        try:
+            import streamlit as st
+            # 説明テキスト用のコンテナ
+            self.text_container = st.empty()
+            if desc:
+                self.text_container.text(desc)
+            # プログレスバー
+            self.progress_bar = st.progress(0)
+            # 後置テキスト用のコンテナ
+            self.postfix_container = st.empty()
+        except ImportError:
+            raise ImportError("streamlitがインストールされていません。")
+    def update(self, n: int = 1) -> None:
+        """進捗を更新する"""
+        self.n += n
+        if self.total:
+            self.progress_bar.progress(min(self.n / self.total, 1.0))
+    def set_description(self, desc: str = None, refresh: bool = True) -> None:
+        """説明テキストを更新する"""
+        if desc is not None:
+            self.desc = desc
+            # self.text_container.text(desc)
+            self.text_container.markdown('<p style="font-size:14px; color:gray;">' + desc +'</p>', unsafe_allow_html=True)
+    def set_postfix(self, ordered_dict=None, **kwargs) -> None:
+        """後置テキストを設定する"""
+        # ordered_dictとkwargsを組み合わせる
+        postfix_dict = {}
+        if ordered_dict:
+            postfix_dict.update(ordered_dict)
+        if kwargs:
+            postfix_dict.update(kwargs)
+        if postfix_dict:
+            # 辞書を文字列に変換して表示
+            postfix_str = ', '.join(f'{k}={v}' for k, v in postfix_dict.items())
+            self.postfix_container.text(f"状態: {postfix_str}")
+    def close(self) -> None:
+        """プログレスバーを完了状態にする"""
+        if self.total:
+            self.progress_bar.progress(1.0)
+        self.text_container.empty()
+    def refresh(self):
+        """ 不要なので何もしない """
+        pass
+    def __iter__(self):
+        """イテレータとして使用できるようにする"""
+        if self.iterable is None:
+            raise ValueError("このプログレスバーはイテレータとして使用できません")
+        for obj in self.iterable:
+            yield obj
+            self.update(1)
+        self.close()
+    def __enter__(self):
+        """コンテキストマネージャとして使用できるようにする"""
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """コンテキスト終了時に呼ばれる"""
+        self.close()
+def tqdm_or_st(iterable: Optional[Iterable[T]] = None,
+              backend: str = 'tqdm',
+              **kwargs) -> Union['tqdm', StreamlitProgress]:
+    """
+    指定されたバックエンドのプログレスバーを返す
+    Args:
+        iterable: 進捗を表示するイテレータ
+        backend: バックエンド ("tqdm", "streamlit")
+        **kwargs: tqdmやStreamlitProgressに渡す引数
+    Returns:
+        tqdm または StreamlitProgress オブジェクト
+    """
+    # if backend == "auto":
+    #     try:
+    #         import streamlit as st
+    #         if st._is_running_with_streamlit:
+    #             backend = "streamlit"
+    #         else:
+    #             backend = "tqdm"
+    #     except (ImportError, AttributeError):
+    #         backend = "tqdm"
+    assert backend in ['tqdm', 'streamlit']
+    if backend == "tqdm":
+        try:
+            from tqdm import tqdm
+            return tqdm(iterable, **kwargs)
+        except ImportError:
+            print("tqdmが見つからないため、Streamlitバックエンドを試行します...")
+            backend = "streamlit"
+    # Streamlitを使用
+    if backend == "streamlit":
+        try:
+            return StreamlitProgress(iterable, **kwargs)
+        except ImportError:
+            print("Streamlitが見つかりません。プログレスバーなしで実行します。")
+            # フォールバック: 何もしないダミープログレスバー
+            try:
+                from tqdm import tqdm
+                return tqdm(iterable, disable=True, **kwargs)
+            except ImportError:
+                # tqdmもないので、単なるイテレータを返す
+                class DummyTqdm:
+                    def __init__(self, iterable=None, **kwargs):
+                        self.iterable = iterable
+                    def update(self, n=1): pass
+                    def close(self): pass
+                    def set_description(self, desc=None, refresh=True): pass
+                    def set_postfix(self, ordered_dict=None, **kwargs): pass
+                    def __iter__(self):
+                        if self.iterable is None: raise ValueError("イテレータがありません")
+                        for x in self.iterable: yield x
+                    def __enter__(self): return self
+                    def __exit__(self, *args, **kwargs): pass
+                return DummyTqdm(iterable, **kwargs)
+# 基本的な使用例
+def basic_example():
+    """基本的な使用例"""
+    items = list(range(10))
+    # tqdmと同じ使い方
+    for item in tqdm_or_st(items, desc="基本的な例", backend="tqdm"):
+        time.sleep(0.1)
+        print(f"処理中: {item}")
+# Streamlitの使用例
+def streamlit_example():
+    """Streamlitでの使用例 (Streamlitアプリ内で実行する必要があります)"""
+    import streamlit as st
+    st.title("処理の進捗表示")
+    items = list(range(10))
+    results = []
+    # 自動的にStreamlitを検出
+    for item in tqdm_or_st(items, desc="処理中...", backend="auto"):
+        time.sleep(0.2)
+        results.append(item * 2)
+    st.write("結果:", results)
+# コンテキストマネージャとしての使用例
+def context_manager_example():
+    """コンテキストマネージャとしての使用例"""
+    total_steps = 5
+    # with文で使用
+    with tqdm_or_st(total=total_steps, desc="手動更新", backend="tqdm") as pbar:
+        for i in range(total_steps):
+            time.sleep(0.2)
+            # 説明を更新
+            if i == 2:
+                pbar.set_description(f"ステップ {i+1}/{total_steps}")
+            # 追加情報を表示
+            pbar.set_postfix(progress=f"{(i+1)/total_steps:.0%}")
+            # 進捗を更新
+            pbar.update(1)
+# テスト用のメイン関数
+def main():
+    print("基本的な使用例:")
+    basic_example()
+    print("\nコンテキストマネージャとしての使用例:")
+    context_manager_example()
+    print("\nStreamlitの例はStreamlitアプリ内で実行してください")
+    # streamlit_example()  # Streamlitアプリ内でのみ実行可能
+if __name__ == "__main__":
+    main()

wsi_toolbox/utils/seed.py ADDED Viewed

@@ -0,0 +1,21 @@
+import random
+import numpy as np
+import torch
+__GLOBAL_SEED = 42
+def get_global_seed():
+    return __GLOBAL_SEED
+def fix_global_seed(seed=None):
+    if seed is None:
+        seed = get_global_seed()
+    global __GLOBAL_SEED
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.random.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.use_deterministic_algorithms = True
+    __GLOBAL_SEED = seed

wsi_toolbox/utils/st.py ADDED Viewed

@@ -0,0 +1,53 @@
+import streamlit as st
+from contextlib import contextmanager
+HORIZONTAL_STYLE = """
+<style class="hide-element">
+    /* Hides the style container and removes the extra spacing */
+    .element-container:has(.hide-element) {
+        display: none;
+    }
+    /*
+        The selector for >.element-container is necessary to avoid selecting the whole
+        body of the streamlit app, which is also a stVerticalBlock.
+    */
+    div[data-testid="stVerticalBlock"]:has(> .element-container .horizontal-marker) {
+        display: flex;
+        flex-direction: row !important;
+        flex-wrap: wrap;
+        gap: 0.5rem;
+        align-items: baseline;
+    }
+    /* Buttons and their parent container all have a width of 704px, which we need to override */
+    div[data-testid="stVerticalBlock"]:has(> .element-container .horizontal-marker) div {
+        width: max-content !important;
+    }
+    /* Selectbox container */
+    div[data-testid="stVerticalBlock"]:has(> .element-container .horizontal-marker) div[data-testid="stSelectbox"] {
+        display: flex !important;
+        flex-direction: row !important;
+        align-items: center !important;
+        gap: 0.5rem !important;
+    }
+    /* Selectbox label */
+    div[data-testid="stVerticalBlock"]:has(> .element-container .horizontal-marker) div[data-testid="stWidgetLabel"] {
+        margin-bottom: 0 !important;
+        padding-right: 0.5rem !important;
+    }
+    /* Selectbox input container */
+    div[data-testid="stVerticalBlock"]:has(> .element-container .horizontal-marker) div[data-baseweb="select"] {
+        min-width: 120px !important;
+    }
+    /* Selectbox dropdown */
+    div[data-testid="stVerticalBlock"]:has(> .element-container .horizontal-marker) div[role="listbox"] {
+        min-width: 120px !important;
+    }
+</style>
+"""
+@contextmanager
+def st_horizontal():
+    st.markdown(HORIZONTAL_STYLE, unsafe_allow_html=True)
+    with st.container():
+        st.markdown('<span class="hide-element horizontal-marker"></span>', unsafe_allow_html=True)
+        yield