wsi-toolbox 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wsi_toolbox/watcher.py ADDED
@@ -0,0 +1,261 @@
1
+ import os
2
+ import time
3
+ import argparse
4
+ import asyncio
5
+ from pathlib import Path
6
+ from typing import Dict, Set, Callable, Optional
7
+ from rich.console import Console
8
+ from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
9
+
10
+ from .utils import plot_umap
11
+ from . import commands
12
+
13
+ DEFAULT_MODEL = os.getenv('DEFAULT_MODEL', 'uni')
14
+
15
+ class Status:
16
+ PROCESSING = "PROCESSING"
17
+ DONE = "DONE"
18
+ ERROR = "ERROR"
19
+
20
+ @classmethod
21
+ def is_processing_state(cls, status: str) -> bool:
22
+ """状態が処理中系かどうかを判定"""
23
+ return status.startswith((cls.PROCESSING, cls.DONE, cls.ERROR))
24
+
25
+ class Task:
26
+ REQUEST_FILE = "_ROBIEMON.txt"
27
+ LOG_FILE = "_ROBIEMON_LOG.txt"
28
+
29
+ @staticmethod
30
+ def parse_request_line(line: str) -> tuple[str, bool]:
31
+ """Parse the request line for model and rotation specifications.
32
+ Returns (model_name, should_rotate)"""
33
+ parts = [p.strip() for p in line.split(',')]
34
+ model_name = parts[0] if parts and parts[0] else DEFAULT_MODEL
35
+ should_rotate = len(parts) > 1 and parts[1].lower() == 'rotate'
36
+ return model_name, should_rotate
37
+
38
+ def __init__(self, folder:Path, options_line:str, on_complete:Optional[Callable[[Path], None]] = None):
39
+ self.folder = folder
40
+ self.options_line = options_line
41
+ self.model_name, self.should_rotate = self.parse_request_line(options_line)
42
+ self.on_complete = on_complete
43
+ self.wsi_files = list(folder.glob("**/*.ndpi")) + list(folder.glob("**/*.svs"))
44
+ self.wsi_files.sort()
45
+
46
+ commands.set_default_progress('tqdm')
47
+ commands.set_default_model(self.model_name)
48
+
49
+ def write_banner(self):
50
+ """処理開始時のバナーをログに書き込み"""
51
+ self.append_log("="*50)
52
+ self.append_log(f"Processing folder: {self.folder}")
53
+ self.append_log(f"Request options: {self.options_line}")
54
+ self.append_log(f"Parsed options:")
55
+ self.append_log(f" - Model: {self.model_name} (default: {DEFAULT_MODEL})")
56
+ self.append_log(f" - Rotation: {'enabled' if self.should_rotate else 'disabled'}")
57
+ self.append_log(f"Found {len(self.wsi_files)} WSI files:")
58
+ for i, wsi_file in enumerate(self.wsi_files, 1):
59
+ size_mb = wsi_file.stat().st_size / (1024 * 1024)
60
+ self.append_log(f" {i}. {wsi_file.name} ({size_mb:.1f} MB)")
61
+ self.append_log("="*50)
62
+
63
+ def run(self):
64
+ try:
65
+ # ログファイルをクリア
66
+ with open(self.folder / self.LOG_FILE, "w") as f:
67
+ f.write("")
68
+
69
+ self.set_status(Status.PROCESSING)
70
+ self.write_banner()
71
+
72
+ # WSIファイルごとの処理
73
+ for i, wsi_file in enumerate(self.wsi_files):
74
+ try:
75
+ self.append_log(f"Processing [{i+1}/{len(self.wsi_files )}]: {wsi_file.name}")
76
+
77
+ hdf5_tmp_path = wsi_file.with_suffix('.h5.tmp')
78
+ hdf5_file = wsi_file.with_suffix(".h5")
79
+
80
+ # HDF5変換(既存の場合はスキップ)
81
+ if not hdf5_file.exists():
82
+ self.append_log("Converting to HDF5...")
83
+ # Use new command pattern
84
+ commands.set_default_progress('tqdm')
85
+ cmd = commands.Wsi2HDF5Command(rotate=self.should_rotate)
86
+ result = cmd(str(wsi_file), str(hdf5_tmp_path))
87
+ os.rename(hdf5_tmp_path, hdf5_file)
88
+ self.append_log("HDF5 conversion completed.")
89
+
90
+ # 特徴量抽出(既存の場合はスキップ)
91
+ self.append_log("Extracting features...")
92
+ # Use new command pattern
93
+ commands.set_default_device('cuda')
94
+ emb_cmd = commands.PatchEmbeddingCommand()
95
+ emb_result = emb_cmd(str(hdf5_file))
96
+ self.append_log("Feature extraction completed.")
97
+
98
+ # クラスタリングとUMAP生成
99
+ self.append_log("Starting clustering ...")
100
+ # Use new command pattern
101
+ cluster_cmd = commands.ClusteringCommand(
102
+ resolution=1.0,
103
+ use_umap=True
104
+ )
105
+ cluster_result = cluster_cmd([hdf5_file])
106
+ self.append_log("Clustering completed.")
107
+
108
+ base = str(wsi_file.with_suffix(""))
109
+
110
+ # UMAPプロット生成
111
+ self.append_log("Starting UMAP generation...")
112
+ umap_path = Path(f"{base}_umap.png")
113
+ if not umap_path.exists():
114
+ umap_embs = cluster_cmd.get_umap_embeddings()
115
+ fig = plot_umap(umap_embs, cluster_cmd.total_clusters)
116
+ fig.savefig(umap_path, bbox_inches='tight', pad_inches=0.5)
117
+ self.append_log(f"UMAP plot completed. Saved to {os.path.basename(umap_path)}")
118
+ else:
119
+ self.append_log(f"UMAP plot already exists. Skipped.")
120
+
121
+ # サムネイル生成
122
+ self.append_log("Starting thumbnail generation...")
123
+ thumb_path = Path(f"{base}_thumb.jpg")
124
+ if not thumb_path.exists():
125
+ # Use new command pattern
126
+ preview_cmd = commands.PreviewClustersCommand(size=64)
127
+ img = preview_cmd(str(hdf5_file), cluster_name='')
128
+ img.save(thumb_path)
129
+ self.append_log(f"Thumbnail generation completed. Saved to {thumb_path.name}")
130
+ else:
131
+ self.append_log(f"Thumbnail already exists. Skipped.")
132
+
133
+ self.append_log("="*30)
134
+
135
+ except Exception as e:
136
+ self.append_log(f"Error processing {wsi_file}: {str(e)}")
137
+ self.set_status(Status.ERROR)
138
+ if self.on_complete:
139
+ self.on_complete(self.folder)
140
+ return
141
+
142
+ self.set_status(Status.DONE)
143
+ self.append_log("All processing completed successfully")
144
+
145
+ except Exception as e:
146
+ self.append_log(f"Error: {str(e)}")
147
+
148
+ if self.on_complete:
149
+ self.on_complete(self.folder)
150
+
151
+ def set_status(self, status: str):
152
+ self.status = status
153
+ with open(self.folder / self.REQUEST_FILE, "w") as f:
154
+ f.write(f"{status}\n")
155
+
156
+ def append_log(self, message: str):
157
+ with open(self.folder / self.LOG_FILE, "a") as f:
158
+ f.write(message + "\n")
159
+ print(message)
160
+
161
+ class Watcher:
162
+ def __init__(self, base_dir: str):
163
+ self.base_dir = Path(base_dir)
164
+ self.running_tasks: Dict[Path, Task] = {}
165
+ self.console = Console()
166
+
167
+ def run(self, interval: int = 60):
168
+ self.console.print("\n[bold blue]ROBIEMON Watcher started[/]")
169
+ self.console.print(f"[blue]Watching directory:[/] {self.base_dir}")
170
+ self.console.print(f"[blue]Polling interval:[/] {interval} seconds")
171
+ self.console.print("[yellow]Press Ctrl+C to stop[/]\n")
172
+
173
+ while True:
174
+ try:
175
+ self.check_folders()
176
+
177
+ # カウントダウン表示
178
+ for remaining in range(interval, 0, -1):
179
+ print(f"\rNext check in {remaining:2d}s", end="", flush=True)
180
+ time.sleep(1)
181
+ # カウントダウン終了後、同じ行を再利用
182
+ print("\rNext check in 0s", end="", flush=True)
183
+
184
+ except KeyboardInterrupt:
185
+ self.console.print("\n[yellow]Stopping watcher...[/]")
186
+ break
187
+ except Exception as e:
188
+ self.console.print(f"[red]ERROR:[/] {str(e)}")
189
+
190
+ def check_folders(self):
191
+ for folder in self.base_dir.rglob("*"):
192
+ if not folder.is_dir():
193
+ continue
194
+
195
+ request_file = folder / Task.REQUEST_FILE
196
+ if not request_file.exists():
197
+ continue
198
+
199
+ if folder in self.running_tasks:
200
+ continue
201
+
202
+ try:
203
+ with open(request_file, "r") as f:
204
+ content = f.read()
205
+ if not content.strip():
206
+ continue
207
+
208
+ # First line contains model/rotation specs
209
+ options_line = content.split('\n')[0].strip()
210
+
211
+ # Original status check from the entire file
212
+ status = content.strip()
213
+
214
+ except:
215
+ continue
216
+
217
+ if Status.is_processing_state(status):
218
+ continue
219
+
220
+ # \rを含むログから改行するため空白行を挿入
221
+ print()
222
+ print()
223
+ print(f"detected: {folder}")
224
+ print(f"Request options: {options_line}")
225
+
226
+ task = Task(folder, options_line, on_complete=lambda f: self.running_tasks.pop(f, None))
227
+ self.running_tasks[folder] = task
228
+ task.run() # 同期実行に変更
229
+
230
+ BASE_DIR = os.getenv('BASE_DIR', 'data')
231
+
232
+ def main():
233
+ parser = argparse.ArgumentParser(description="ROBIEMON WSI Processor Watcher")
234
+ parser.add_argument(
235
+ "--base-dir",
236
+ type=str,
237
+ default=BASE_DIR,
238
+ help="Base directory to watch for WSI processing requests"
239
+ )
240
+ parser.add_argument(
241
+ "--interval",
242
+ type=int,
243
+ default=60,
244
+ help="Polling interval in seconds (default: 60)"
245
+ )
246
+
247
+ args = parser.parse_args()
248
+
249
+ base_dir = Path(args.base_dir)
250
+ if not base_dir.exists():
251
+ print(f"Error: Base directory '{args.base_dir}' does not exist")
252
+ return
253
+ if not base_dir.is_dir():
254
+ print(f"Error: '{args.base_dir}' is not a directory")
255
+ return
256
+
257
+ watcher = Watcher(args.base_dir)
258
+ watcher.run(interval=args.interval) # asyncio.runを削除
259
+
260
+ if __name__ == "__main__":
261
+ main()
@@ -0,0 +1,187 @@
1
+ """
2
+ WSI (Whole Slide Image) file handling classes.
3
+
4
+ Provides unified interface for different WSI formats:
5
+ - OpenSlide compatible formats (.svs, .tiff, etc.)
6
+ - TIFF files (.ndpi, .tif)
7
+ - Standard images (.jpg, .png)
8
+ """
9
+
10
+ import os
11
+ import cv2
12
+ import numpy as np
13
+ from openslide import OpenSlide
14
+ import tifffile
15
+ import zarr
16
+
17
+
18
+ class WSIFile:
19
+ """Base class for WSI file readers"""
20
+
21
+ def __init__(self, path):
22
+ pass
23
+
24
+ def get_mpp(self):
25
+ """Get microns per pixel"""
26
+ pass
27
+
28
+ def get_original_size(self):
29
+ """Get original image size (width, height)"""
30
+ pass
31
+
32
+ def read_region(self, xywh):
33
+ """Read region as RGB numpy array
34
+
35
+ Args:
36
+ xywh: tuple of (x, y, width, height)
37
+
38
+ Returns:
39
+ np.ndarray: RGB image (H, W, 3)
40
+ """
41
+ pass
42
+
43
+
44
+ class TiffFile(WSIFile):
45
+ """TIFF file reader using tifffile library"""
46
+
47
+ def __init__(self, path):
48
+ self.tif = tifffile.TiffFile(path)
49
+
50
+ store = self.tif.pages[0].aszarr()
51
+ self.zarr_data = zarr.open(store, mode='r') # 読み込み専用で開く
52
+
53
+ def get_original_size(self):
54
+ s = self.tif.pages[0].shape
55
+ return (s[1], s[0])
56
+
57
+ def get_mpp(self):
58
+ tags = self.tif.pages[0].tags
59
+ resolution_unit = tags.get('ResolutionUnit', None)
60
+ x_resolution = tags.get('XResolution', None)
61
+
62
+ assert resolution_unit
63
+ assert x_resolution
64
+
65
+ x_res_value = x_resolution.value
66
+ if isinstance(x_res_value, tuple) and len(x_res_value) == 2:
67
+ # 分数の形式(分子/分母)
68
+ numerator, denominator = x_res_value
69
+ resolution = numerator / denominator
70
+ else:
71
+ resolution = x_res_value
72
+
73
+ # 解像度単位の判定(2=インチ、3=センチメートル)
74
+ if resolution_unit.value == 2: # インチ
75
+ # インチあたりのピクセル数からミクロンあたりのピクセル数へ変換
76
+ # 1インチ = 25400ミクロン
77
+ mpp = 25400.0 / resolution
78
+ elif resolution_unit.value == 3: # センチメートル
79
+ # センチメートルあたりのピクセル数からミクロンあたりのピクセル数へ変換
80
+ # 1センチメートル = 10000ミクロン
81
+ mpp = 10000.0 / resolution
82
+ else:
83
+ mpp = 1.0 / resolution # 単位不明の場合
84
+
85
+ return mpp
86
+
87
+ def read_region(self, xywh):
88
+ x, y, width, height = xywh
89
+ page = self.tif.pages[0]
90
+
91
+ full_width = page.shape[1] # tifffileでは[height, width]の順
92
+ full_height = page.shape[0]
93
+
94
+ x = max(0, min(x, full_width - 1))
95
+ y = max(0, min(y, full_height - 1))
96
+ width = min(width, full_width - x)
97
+ height = min(height, full_height - y)
98
+
99
+ if page.is_tiled:
100
+ region = self.zarr_data[y:y+height, x:x+width]
101
+ else:
102
+ full_image = page.asarray()
103
+ region = full_image[y:y+height, x:x+width]
104
+
105
+ # カラーモデルの処理
106
+ if region.ndim == 2: # グレースケール
107
+ region = np.stack([region, region, region], axis=-1)
108
+ elif region.shape[2] == 4: # RGBA
109
+ region = region[:, :, :3] # RGBのみ取得
110
+ return region
111
+
112
+
113
+ class OpenSlideFile(WSIFile):
114
+ """OpenSlide compatible file reader"""
115
+
116
+ def __init__(self, path):
117
+ self.wsi = OpenSlide(path)
118
+ self.prop = dict(self.wsi.properties)
119
+
120
+ def get_mpp(self):
121
+ return float(self.prop['openslide.mpp-x'])
122
+
123
+ def get_original_size(self):
124
+ dim = self.wsi.level_dimensions[0]
125
+ return (dim[0], dim[1])
126
+
127
+ def read_region(self, xywh):
128
+ # self.wsi.read_region((0, row*T), target_level, (width, T))
129
+ # self.wsi.read_region((x, y), target_level, (w, h))
130
+ img = self.wsi.read_region((xywh[0], xywh[1]), 0, (xywh[2], xywh[3])).convert('RGB')
131
+ img = np.array(img.convert('RGB'))
132
+ return img
133
+
134
+
135
+ class StandardImage(WSIFile):
136
+ """Standard image file reader (JPG, PNG, etc.)"""
137
+
138
+ def __init__(self, path, mpp):
139
+ self.image = cv2.imread(path)
140
+ self.image = cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB) # OpenCVはBGR形式で読み込むのでRGBに変換
141
+ self.mpp = mpp
142
+ assert self.mpp is not None, 'Specify mpp when using StandardImage'
143
+
144
+ def get_mpp(self):
145
+ return self.mpp
146
+
147
+ def get_original_size(self):
148
+ return self.image.shape[1], self.image.shape[0] # width, height
149
+
150
+ def read_region(self, xywh):
151
+ x, y, w, h = xywh
152
+ return self.image[y:y+h, x:x+w]
153
+
154
+
155
+ def create_wsi_file(image_path: str, engine: str = 'auto', **kwargs) -> WSIFile:
156
+ """
157
+ Factory function to create appropriate WSIFile instance
158
+
159
+ Args:
160
+ image_path: Path to WSI file
161
+ engine: Engine type ('auto', 'openslide', 'tifffile', 'standard')
162
+ **kwargs: Additional arguments (e.g., mpp for standard images)
163
+
164
+ Returns:
165
+ WSIFile: Appropriate WSIFile subclass instance
166
+ """
167
+ if engine == 'auto':
168
+ ext = os.path.splitext(image_path)[1].lower()
169
+ if ext == '.ndpi':
170
+ engine = 'tifffile'
171
+ elif ext in ['.jpg', '.jpeg', '.png', '.tif', 'tiff']:
172
+ engine = 'standard'
173
+ else:
174
+ engine = 'openslide'
175
+ print(f'using {engine} engine for {os.path.basename(image_path)}')
176
+
177
+ engine = engine.lower()
178
+
179
+ if engine == 'openslide':
180
+ return OpenSlideFile(image_path)
181
+ elif engine == 'tifffile':
182
+ return TiffFile(image_path)
183
+ elif engine == 'standard':
184
+ mpp = kwargs.get('mpp', None)
185
+ return StandardImage(image_path, mpp=mpp)
186
+ else:
187
+ raise ValueError(f'Invalid engine: {engine}')