wsi-toolbox 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wsi_toolbox/app.py ADDED
@@ -0,0 +1,874 @@
1
+ import os
2
+ import re
3
+ import sys
4
+ import warnings
5
+ from datetime import datetime
6
+ from pathlib import Path as P
7
+ from typing import Any, Dict, List, Optional
8
+
9
+ import h5py
10
+ import numpy as np
11
+ import pandas as pd
12
+ import torch
13
+ from PIL import Image
14
+ from pydantic import BaseModel
15
+ from st_aggrid import AgGrid, GridOptionsBuilder, JsCode
16
+
17
+ torch.classes.__path__ = []
18
+ import streamlit as st
19
+
20
+ sys.path.append(str(P(__file__).parent))
21
+ __package__ = "wsi_toolbox"
22
+
23
+ from . import commands
24
+ from .utils.plot import plot_scatter_2d
25
+ from .utils.st import st_horizontal
26
+
27
+ # Suppress warnings
28
+ # sklearn 1.6+ internal deprecation warning
29
+ warnings.filterwarnings("ignore", category=FutureWarning, message=".*force_all_finite.*")
30
+ # timm library internal torch.load warning
31
+ warnings.filterwarnings(
32
+ "ignore", category=FutureWarning, message="You are using `torch.load` with `weights_only=False`"
33
+ )
34
+
35
+ commands.set_default_progress("streamlit")
36
+ commands.set_default_device("cuda")
37
+
38
+ Image.MAX_IMAGE_PIXELS = 3_500_000_000
39
+
40
+ BASE_DIR = os.getenv("BASE_DIR", "data")
41
+ DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "uni")
42
+
43
+
44
+ MODEL_LABELS = {
45
+ "uni": "UNI",
46
+ "gigapath": "Prov-Gigapath",
47
+ "virchow2": "Virchow2",
48
+ }
49
+ _MODEL_NAMES_BY_LABEL = {v: k for k, v in MODEL_LABELS.items()} # Private
50
+ MODEL_NAMES = list(MODEL_LABELS.keys())
51
+
52
+
53
+ # Global constants
54
+ BATCH_SIZE = 256
55
+ PATCH_SIZE = 256
56
+ THUMBNAIL_SIZE = 64
57
+ DEFAULT_CLUSTER_RESOLUTION = 1.0
58
+ MAX_CLUSTER_RESOLUTION = 3.0
59
+ MIN_CLUSTER_RESOLUTION = 0.0
60
+ CLUSTER_RESOLUTION_STEP = 0.1
61
+
62
+
63
+ # File type definitions
64
+ class FileType:
65
+ EMPTY = "empty"
66
+ MIX = "mix"
67
+ DIRECTORY = "directory"
68
+ WSI = "wsi"
69
+ HDF5 = "hdf5"
70
+ IMAGE = "image"
71
+ OTHER = "other"
72
+
73
+
74
+ FILE_TYPE_CONFIG = {
75
+ # FileType.EMPTY: {
76
+ # 'label': '空',
77
+ # 'icon': '🔳',
78
+ # },
79
+ FileType.DIRECTORY: {
80
+ "label": "フォルダ",
81
+ "icon": "📁",
82
+ },
83
+ FileType.WSI: {
84
+ "label": "WSI",
85
+ "icon": "🔬",
86
+ "extensions": {".ndpi", ".svs"},
87
+ },
88
+ FileType.HDF5: {
89
+ "label": "HDF5",
90
+ "icon": "📊",
91
+ "extensions": {".h5"},
92
+ },
93
+ FileType.IMAGE: {
94
+ "label": "画像",
95
+ "icon": "🖼️",
96
+ "extensions": {".bmp", ".gif", ".icns", ".ico", ".jpg", ".jpeg", ".png", ".tif", ".tiff"},
97
+ },
98
+ FileType.OTHER: {
99
+ "label": "その他",
100
+ "icon": "📄",
101
+ },
102
+ }
103
+
104
+
105
+ def get_file_type(path: P) -> str:
106
+ """ファイルパスからファイルタイプを判定する"""
107
+ if path.is_dir():
108
+ return FileType.DIRECTORY
109
+
110
+ ext = path.suffix.lower()
111
+ for type_key, config in FILE_TYPE_CONFIG.items():
112
+ if "extensions" in config and ext in config["extensions"]:
113
+ return type_key
114
+
115
+ return FileType.OTHER
116
+
117
+
118
+ def get_file_type_display(type_key: str) -> str:
119
+ """ファイルタイプの表示用ラベルとアイコンを取得する"""
120
+ config = FILE_TYPE_CONFIG.get(type_key, FILE_TYPE_CONFIG[FileType.OTHER])
121
+ return f"{config['icon']} {config['label']}"
122
+
123
+
124
+ def add_beforeunload_js():
125
+ js = """
126
+ <script>
127
+ window.onbeforeunload = function(e) {
128
+ if (window.localStorage.getItem('streamlit_locked') === 'true') {
129
+ e.preventDefault();
130
+ e.returnValue = "処理中にページを離れると処理がリセットされます。ページを離れますか?";
131
+ return e.returnValue;
132
+ }
133
+ };
134
+ </script>
135
+ """
136
+ st.components.v1.html(js, height=0)
137
+
138
+
139
+ def set_locked_state(is_locked):
140
+ print("locked", is_locked)
141
+ st.session_state.locked = is_locked
142
+ js = f"""
143
+ <script>
144
+ window.localStorage.setItem('streamlit_locked', '{str(is_locked).lower()}');
145
+ </script>
146
+ """
147
+ st.components.v1.html(js, height=0)
148
+
149
+
150
+ def lock():
151
+ set_locked_state(True)
152
+
153
+
154
+ def unlock():
155
+ set_locked_state(False)
156
+ # キャッシュをクリア(処理後にファイルが更新されているため)
157
+ st.cache_data.clear()
158
+
159
+
160
+ st.set_page_config(page_title="WSI Analysis System", page_icon="🔬", layout="wide")
161
+
162
+ STATUS_READY = 0
163
+ STATUS_BLOCKED = 1
164
+ STATUS_UNSUPPORTED = 2
165
+
166
+
167
+ def render_reset_button():
168
+ if st.button("リセットする", on_click=unlock):
169
+ st.rerun()
170
+
171
+
172
+ def build_output_path(input_path: str, namespace: str, filename: str) -> str:
173
+ """
174
+ Build output path based on namespace.
175
+
176
+ - namespace="default": save in same directory as input file
177
+ - namespace=other: save in namespace subdirectory (created if needed)
178
+ """
179
+ p = P(input_path)
180
+ if namespace == "default":
181
+ output_dir = p.parent
182
+ else:
183
+ output_dir = p.parent / namespace
184
+ os.makedirs(output_dir, exist_ok=True)
185
+ return str(output_dir / filename)
186
+
187
+
188
+ def render_navigation(current_dir_abs, default_root_abs):
189
+ """Render navigation buttons for moving between directories."""
190
+ with st_horizontal():
191
+ if current_dir_abs == default_root_abs:
192
+ st.button("↑ 親フォルダへ", disabled=True)
193
+ else:
194
+ if st.button("↑ 親フォルダへ", disabled=st.session_state.locked):
195
+ parent_dir = os.path.dirname(current_dir_abs)
196
+ if os.path.commonpath([default_root_abs]) == os.path.commonpath([default_root_abs, parent_dir]):
197
+ st.session_state.current_dir = parent_dir
198
+ st.rerun()
199
+ if st.button("フォルダ更新", disabled=st.session_state.locked):
200
+ st.rerun()
201
+
202
+ model_label = MODEL_LABELS[st.session_state.model]
203
+ new_model_label = st.selectbox(
204
+ "使用モデル",
205
+ list(MODEL_LABELS.values()),
206
+ index=list(MODEL_LABELS.values()).index(model_label),
207
+ disabled=st.session_state.locked,
208
+ )
209
+ new_model = _MODEL_NAMES_BY_LABEL[new_model_label]
210
+
211
+ # モデルが変更された場合、即座にリロード
212
+ if new_model != st.session_state.model:
213
+ print("model changed", st.session_state.model, "->", new_model)
214
+ st.session_state.model = new_model
215
+ st.rerun()
216
+
217
+
218
+ class HDF5Detail(BaseModel):
219
+ status: int
220
+ has_features: bool
221
+ cluster_names: List[str]
222
+ patch_count: int
223
+ mpp: float
224
+ cols: int
225
+ rows: int
226
+ desc: Optional[str] = None
227
+ cluster_ids_by_name: Dict[str, List[int]]
228
+
229
+
230
+ class FileEntry(BaseModel):
231
+ name: str
232
+ path: str
233
+ type: str
234
+ size: int
235
+ modified: datetime
236
+ detail: Optional[HDF5Detail] = None
237
+
238
+ def to_dict(self) -> Dict[str, Any]:
239
+ """AG Grid用の辞書に変換"""
240
+ return {
241
+ "name": self.name,
242
+ "path": self.path,
243
+ "type": self.type,
244
+ "size": self.size,
245
+ "modified": self.modified,
246
+ "detail": self.detail.model_dump() if self.detail else None,
247
+ }
248
+
249
+
250
+ @st.cache_data(ttl=60)
251
+ def get_hdf5_detail(hdf_path: str, model_name: str, _mtime: float) -> Optional[HDF5Detail]:
252
+ """
253
+ HDF5ファイルの詳細を取得(キャッシュ付き)
254
+
255
+ Args:
256
+ hdf_path: HDF5ファイルパス
257
+ model_name: モデル名
258
+ _mtime: ファイル更新時刻(キャッシュ無効化用)
259
+ """
260
+ from .utils.hdf5_paths import list_namespaces
261
+
262
+ try:
263
+ with h5py.File(hdf_path, "r") as f:
264
+ if "metadata/patch_count" not in f:
265
+ return HDF5Detail(
266
+ status=STATUS_UNSUPPORTED,
267
+ has_features=False,
268
+ cluster_names=["未施行"],
269
+ patch_count=0,
270
+ mpp=0,
271
+ cols=0,
272
+ rows=0,
273
+ cluster_ids_by_name={},
274
+ )
275
+ patch_count = f["metadata/patch_count"][()]
276
+ has_features = (f"{model_name}/features" in f) and (len(f[f"{model_name}/features"]) == patch_count)
277
+ cluster_names = ["未施行"]
278
+ if model_name in f:
279
+ # List all namespaces (directories with clusters dataset)
280
+ namespaces = list_namespaces(f, model_name)
281
+ if namespaces:
282
+ cluster_names = []
283
+ for ns in namespaces:
284
+ if ns == "default":
285
+ cluster_names.append("デフォルト")
286
+ else:
287
+ cluster_names.append(ns)
288
+
289
+ cluster_ids_by_name = {}
290
+ for c in cluster_names:
291
+ if c == "未施行":
292
+ continue
293
+ ns = "default" if c == "デフォルト" else c
294
+ k = f"{model_name}/{ns}/clusters"
295
+ if k in f:
296
+ ids = np.unique(f[k][()]).tolist()
297
+ cluster_ids_by_name[c] = ids
298
+ return HDF5Detail(
299
+ status=STATUS_READY,
300
+ has_features=has_features,
301
+ cluster_names=cluster_names,
302
+ patch_count=patch_count,
303
+ mpp=f["metadata/mpp"][()],
304
+ cols=f["metadata/cols"][()],
305
+ rows=f["metadata/rows"][()],
306
+ cluster_ids_by_name=cluster_ids_by_name,
307
+ )
308
+ except BlockingIOError:
309
+ return HDF5Detail(
310
+ status=STATUS_BLOCKED,
311
+ has_features=False,
312
+ cluster_names=[""],
313
+ patch_count=0,
314
+ mpp=0,
315
+ cols=0,
316
+ rows=0,
317
+ desc="他システムで処理中",
318
+ )
319
+
320
+
321
+ def list_files(directory) -> List[FileEntry]:
322
+ files = []
323
+ directories = []
324
+
325
+ for item in sorted(os.listdir(directory)):
326
+ item_path = P(os.path.join(directory, item))
327
+ file_type = get_file_type(item_path)
328
+ type_config = FILE_TYPE_CONFIG[file_type]
329
+
330
+ if file_type == FileType.DIRECTORY:
331
+ directories.append(
332
+ FileEntry(
333
+ name=f"{type_config['icon']} {item}",
334
+ path=str(item_path),
335
+ type=file_type,
336
+ size=0,
337
+ modified=pd.to_datetime(os.path.getmtime(item_path), unit="s"),
338
+ detail=None,
339
+ )
340
+ )
341
+ continue
342
+
343
+ detail = None
344
+ if file_type == FileType.HDF5:
345
+ mtime = os.path.getmtime(item_path)
346
+ detail = get_hdf5_detail(str(item_path), st.session_state.model, mtime)
347
+
348
+ exists = item_path.exists()
349
+
350
+ files.append(
351
+ FileEntry(
352
+ name=f"{type_config['icon']} {item}",
353
+ path=str(item_path),
354
+ type=file_type,
355
+ size=os.path.getsize(item_path) if exists else 0,
356
+ modified=pd.to_datetime(os.path.getmtime(item_path), unit="s") if exists else 0,
357
+ detail=detail,
358
+ )
359
+ )
360
+
361
+ all_items = directories + files
362
+ return all_items
363
+
364
+
365
+ def render_file_list(files: List[FileEntry]) -> List[FileEntry]:
366
+ """ファイル一覧をAG Gridで表示し、選択されたファイルを返します"""
367
+ if not files:
368
+ st.warning("ファイルが選択されていません")
369
+ return []
370
+
371
+ # FileEntryのリストを辞書のリストに変換し、DataFrameに変換
372
+ data = [entry.to_dict() for entry in files]
373
+ df = pd.DataFrame(data)
374
+
375
+ # グリッドの設定
376
+ gb = GridOptionsBuilder.from_dataframe(df)
377
+
378
+ # カラム設定
379
+ gb.configure_column(
380
+ "name",
381
+ header_name="ファイル名",
382
+ width=300,
383
+ sortable=True,
384
+ )
385
+
386
+ gb.configure_column(
387
+ "type",
388
+ header_name="種別",
389
+ width=100,
390
+ filter="agSetColumnFilter",
391
+ sortable=True,
392
+ valueGetter=JsCode("""
393
+ function(params) {
394
+ const type = params.data.type;
395
+ const config = {
396
+ 'directory': { label: 'フォルダ' },
397
+ 'wsi': { label: 'WSI' },
398
+ 'hdf5': { label: 'HDF5' },
399
+ 'image': { label: '画像' },
400
+ 'other': { label: 'その他' }
401
+ };
402
+ const typeConfig = config[type] || config['other'];
403
+ return typeConfig.label;
404
+ }
405
+ """),
406
+ )
407
+
408
+ gb.configure_column(
409
+ "size",
410
+ header_name="ファイルサイズ",
411
+ width=120,
412
+ sortable=True,
413
+ valueGetter=JsCode("""
414
+ function(params) {
415
+ const size = params.data.size;
416
+ if (size === 0) return '';
417
+ if (size < 1024) return size + ' B';
418
+ if (size < 1024 * 1024) return (size / 1024).toFixed() + ' KB';
419
+ if (size < 1024 * 1024 * 1024) return (size / (1024 * 1024)).toFixed() + ' MB';
420
+ return (size / (1024 * 1024 * 1024)).toFixed() + ' GB';
421
+ }
422
+ """),
423
+ )
424
+
425
+ gb.configure_column(
426
+ "modified",
427
+ header_name="最終更新",
428
+ width=180,
429
+ type=["dateColumnFilter", "customDateTimeFormat"],
430
+ custom_format_string="yyyy/MM/dd HH:mm:ss",
431
+ sortable=True,
432
+ )
433
+
434
+ # 内部カラムを非表示
435
+ gb.configure_column("path", hide=True)
436
+ gb.configure_column("detail", hide=True)
437
+
438
+ # 選択設定
439
+ gb.configure_selection(selection_mode="multiple", use_checkbox=True, header_checkbox=True, pre_selected_rows=[])
440
+
441
+ # グリッドオプションの構築
442
+ grid_options = gb.build()
443
+
444
+ # AG Gridの表示
445
+ grid_response = AgGrid(
446
+ df,
447
+ gridOptions=grid_options,
448
+ height=400,
449
+ fit_columns_on_grid_load=True,
450
+ allow_unsafe_jscode=True,
451
+ theme="streamlit",
452
+ enable_enterprise_modules=False,
453
+ update_on=["selectionChanged"],
454
+ )
455
+
456
+ selected_rows = grid_response["selected_rows"]
457
+ if selected_rows is None:
458
+ return []
459
+
460
+ selected_files = [files[int(i)] for i in selected_rows.index]
461
+ return selected_files
462
+
463
+
464
+ def render_mode_wsi(files: List[FileEntry], selected_files: List[FileEntry]):
465
+ """Render UI for WSI processing mode."""
466
+ model_label = MODEL_LABELS[st.session_state.model]
467
+
468
+ st.subheader("WSIをパッチ分割し特徴量を抽出する", divider=True)
469
+ st.write(f"分割したパッチをHDF5に保存し、{model_label}特徴量抽出を実行します。それぞれ5分、20分程度かかります。")
470
+
471
+ do_clustering = st.checkbox("クラスタリングも実行する", value=True, disabled=st.session_state.locked)
472
+ rotate_preview = st.checkbox(
473
+ "プレビュー時に回転させる(顕微鏡視野にあわせる)",
474
+ value=True,
475
+ disabled=st.session_state.locked,
476
+ )
477
+
478
+ hdf5_paths = []
479
+ if st.button("処理を実行", disabled=st.session_state.locked, on_click=lock):
480
+ set_locked_state(True)
481
+ st.write("WSIから画像をパッチ分割しHDF5ファイルを構築します。")
482
+ with st.container(border=True):
483
+ for i, f in enumerate(selected_files):
484
+ st.write(f"**[{i + 1}/{len(selected_files)}] 処理中のWSIファイル: {f.name}**")
485
+ wsi_path = f.path
486
+ p = P(wsi_path)
487
+ hdf5_path = str(p.with_suffix(".h5"))
488
+ hdf5_tmp_path = str(p.with_suffix(".h5.tmp"))
489
+
490
+ # 既存のHDF5ファイルを検索
491
+ matched_h5_entry = next((f for f in files if f.path == hdf5_path), None)
492
+ if (
493
+ matched_h5_entry is not None
494
+ and matched_h5_entry.detail
495
+ and matched_h5_entry.detail.status == STATUS_READY
496
+ ):
497
+ st.write(
498
+ f"すでにHDF5ファイル({os.path.basename(hdf5_path)})が存在しているので分割処理をスキップしました。"
499
+ )
500
+ else:
501
+ with st.spinner("WSIを分割しHDF5ファイルを構成しています...", show_time=True):
502
+ # Use new command pattern
503
+ cmd = commands.Wsi2HDF5Command(patch_size=PATCH_SIZE)
504
+ _ = cmd(wsi_path, hdf5_tmp_path)
505
+ os.rename(hdf5_tmp_path, hdf5_path)
506
+ st.write("HDF5ファイルに変換完了。")
507
+
508
+ if matched_h5_entry is not None and matched_h5_entry.detail and matched_h5_entry.detail.has_features:
509
+ st.write(f"すでに{model_label}特徴量を抽出済みなので処理をスキップしました。")
510
+ else:
511
+ with st.spinner(f"{model_label}特徴量を抽出中...", show_time=True):
512
+ # Use new command pattern
513
+ commands.set_default_model_preset(st.session_state.model)
514
+ cmd = commands.PatchEmbeddingCommand(batch_size=BATCH_SIZE, overwrite=True)
515
+ _ = cmd(hdf5_path)
516
+ st.write(f"{model_label}特徴量の抽出完了。")
517
+ hdf5_paths.append(hdf5_path)
518
+ if i < len(selected_files) - 1:
519
+ st.divider()
520
+
521
+ if do_clustering:
522
+ st.write("クラスタリングを行います。")
523
+ with st.container(border=True):
524
+ for i, (f, hdf5_path) in enumerate(zip(selected_files, hdf5_paths)):
525
+ st.write(f"**[{i + 1}/{len(selected_files)}] 処理ファイル: {f.name}**")
526
+ base, ext = os.path.splitext(f.path)
527
+ umap_path = f"{base}_umap.png"
528
+ thumb_path = f"{base}_thumb.jpg"
529
+ with st.spinner("UMAP計算中...", show_time=True):
530
+ # Compute UMAP first
531
+ commands.set_default_model_preset(st.session_state.model)
532
+ umap_cmd = commands.UmapCommand()
533
+ umap_result = umap_cmd([hdf5_path])
534
+
535
+ with st.spinner("クラスタリング中...", show_time=True):
536
+ # Cluster using features
537
+ cluster_cmd = commands.ClusteringCommand(
538
+ resolution=DEFAULT_CLUSTER_RESOLUTION, namespace="default", source="features"
539
+ )
540
+ cluster_result = cluster_cmd([hdf5_path])
541
+
542
+ # Load UMAP embeddings and clusters from HDF5
543
+ # (handles both fresh computation and skipped cases)
544
+ with h5py.File(hdf5_path, "r") as hf:
545
+ umap_embs = hf[umap_result.target_path][:]
546
+ clusters = hf[cluster_result.target_path][:]
547
+ # Filter valid (non-NaN for umap, >=0 for clusters)
548
+ valid_mask = ~np.isnan(umap_embs[:, 0]) & (clusters >= 0)
549
+ umap_embs = umap_embs[valid_mask]
550
+ clusters = clusters[valid_mask]
551
+
552
+ fig = plot_scatter_2d(
553
+ [umap_embs],
554
+ [clusters],
555
+ [P(hdf5_path).stem],
556
+ title="UMAP Projection",
557
+ xlabel="UMAP 1",
558
+ ylabel="UMAP 2",
559
+ )
560
+ fig.savefig(umap_path, bbox_inches="tight", pad_inches=0.5)
561
+ st.write(f"クラスタリング結果を{os.path.basename(umap_path)}に出力しました。")
562
+
563
+ with st.spinner("オーバービュー生成中", show_time=True):
564
+ # Use new command pattern
565
+ commands.set_default_model_preset(st.session_state.model)
566
+ preview_cmd = commands.PreviewClustersCommand(size=THUMBNAIL_SIZE, rotate=rotate_preview)
567
+ img = preview_cmd(hdf5_path, namespace="default")
568
+ img.save(thumb_path)
569
+ st.write(f"オーバービューを{os.path.basename(thumb_path)}に出力しました。")
570
+ if i < len(selected_files) - 1:
571
+ st.divider()
572
+
573
+ st.write("すべての処理が完了しました。")
574
+ render_reset_button()
575
+
576
+
577
+ def render_mode_hdf5(selected_files: List[FileEntry]):
578
+ """Render UI for HDF5 analysis mode."""
579
+ model_label = MODEL_LABELS[st.session_state.model]
580
+ st.subheader("HDF5ファイル解析オプション", divider=True)
581
+
582
+ # 選択されたファイルの詳細情報を取得
583
+ details = [{"name": f.name, **f.detail.model_dump()} for f in selected_files if f.detail]
584
+ df_details = pd.DataFrame(details)
585
+
586
+ if len(set(df_details["status"])) > 1:
587
+ st.error("サポートされていないHDF5ファイルが含まれています。")
588
+ return
589
+ if np.all(df_details["status"] == STATUS_UNSUPPORTED):
590
+ st.error("サポートされていないHDF5ファイルが選択されました。")
591
+ return
592
+ if np.all(df_details["status"] == STATUS_BLOCKED):
593
+ st.error("他システムで使用されています。")
594
+ return
595
+ if not np.all(df_details["status"] == STATUS_READY):
596
+ st.error("不明な状態です。")
597
+ return
598
+
599
+ df_details["has_features"] = df_details["has_features"].map({True: "抽出済み", False: "未抽出"})
600
+ st.dataframe(
601
+ df_details,
602
+ column_config={
603
+ "name": "ファイル名",
604
+ "has_features": "特徴量抽出状況",
605
+ "cluster_names": "クラスタリング処理状況",
606
+ "patch_count": "パッチ数",
607
+ "mpp": "micro/pixel",
608
+ "status": None,
609
+ "desc": None,
610
+ "cluster_ids_by_name": None,
611
+ },
612
+ hide_index=True,
613
+ width="content",
614
+ )
615
+
616
+ form = st.form(key="form_hdf5")
617
+ resolution = form.slider(
618
+ "クラスタリング解像度(Leiden resolution)",
619
+ min_value=MIN_CLUSTER_RESOLUTION,
620
+ max_value=MAX_CLUSTER_RESOLUTION,
621
+ value=DEFAULT_CLUSTER_RESOLUTION,
622
+ step=CLUSTER_RESOLUTION_STEP,
623
+ disabled=st.session_state.locked,
624
+ )
625
+ overwrite = form.checkbox(
626
+ "計算済みクラスタ結果を再利用しない(再計算を行う)", value=False, disabled=st.session_state.locked
627
+ )
628
+ source = form.radio(
629
+ "クラスタリングのデータソース",
630
+ options=["features", "umap"],
631
+ index=0,
632
+ disabled=st.session_state.locked,
633
+ help="features: 特徴量ベース(推奨), umap: UMAP座標ベース(事前にUMAP計算が必要)",
634
+ )
635
+ rotate_preview = form.checkbox(
636
+ "プレビュー時に回転させる(顕微鏡視野にあわせる)",
637
+ value=True,
638
+ disabled=st.session_state.locked,
639
+ )
640
+
641
+ # 名前空間(単一ファイル: default, 複数ファイル: xx+yy+... がデフォルト)
642
+ from .utils.hdf5_paths import build_namespace
643
+
644
+ default_namespace = build_namespace([f.path for f in selected_files])
645
+ namespace = default_namespace
646
+ if len(selected_files) > 1:
647
+ namespace = form.text_input(
648
+ "名前空間",
649
+ disabled=st.session_state.locked,
650
+ value=default_namespace,
651
+ help="複数スライド処理時の識別名。空欄の場合は自動生成されます。",
652
+ )
653
+ if not namespace:
654
+ namespace = default_namespace
655
+
656
+ available_cluster_name = []
657
+ if len(selected_files) == 1:
658
+ # available_cluster_name.append('デフォルト')
659
+ available_cluster_name += list(selected_files[0].detail.cluster_ids_by_name.keys())
660
+ else:
661
+ # ファイルごとのユニークなクラスタ名を取得
662
+ cluster_name_sets = [set(f.detail.cluster_ids_by_name.keys()) for f in selected_files]
663
+ common_cluster_name_set = set.intersection(*cluster_name_sets)
664
+ common_cluster_name_set -= {"デフォルト"}
665
+ available_cluster_name = list(common_cluster_name_set)
666
+
667
+ subcluster_name = ""
668
+ subcluster_filter = None
669
+ subcluster_label = ""
670
+ if len(available_cluster_name) > 0:
671
+ subcluster_targets_map = {}
672
+ subcluster_targets = []
673
+ for f in selected_files:
674
+ for ns_name in available_cluster_name:
675
+ cluster_ids = f.detail.cluster_ids_by_name[ns_name]
676
+ for i in cluster_ids:
677
+ v = f"{ns_name} - {i}"
678
+ if v not in subcluster_targets:
679
+ subcluster_targets.append(v)
680
+ subcluster_targets_map[v] = [ns_name, i]
681
+
682
+ subcluster_targets_result = form.multiselect(
683
+ "サブクラスター対象", subcluster_targets, disabled=st.session_state.locked
684
+ )
685
+ if len(subcluster_targets_result) > 0:
686
+ subcluster_names = []
687
+ subcluster_filter = []
688
+ for r in subcluster_targets_result:
689
+ subcluster_name, id = subcluster_targets_map[r]
690
+ subcluster_names.append(subcluster_name)
691
+ subcluster_filter.append(id)
692
+ if len(set(subcluster_names)) > 1:
693
+ st.error("サブクラスター対象は同一クラスタリング対象から選んでください")
694
+ render_reset_button()
695
+ return
696
+ subcluster_name = subcluster_names[0]
697
+ subcluster_filter = sorted(subcluster_filter)
698
+ subcluster_label = "+".join([str(i) for i in subcluster_filter])
699
+
700
+ if form.form_submit_button("クラスタリングを実行", disabled=st.session_state.locked, on_click=lock):
701
+ set_locked_state(True)
702
+
703
+ if len(selected_files) > 1 and namespace != default_namespace:
704
+ # ユーザーが変更した場合は半角英数のみ
705
+ if not re.match(r"^[a-z0-9]+$", namespace):
706
+ st.error("名前空間は小文字半角英数字のみ入力してください")
707
+ render_reset_button()
708
+ return
709
+
710
+ for f in selected_files:
711
+ if not f.detail or not f.detail.has_features:
712
+ st.write(f"{f.name}の特徴量が未抽出なので、抽出を行います。")
713
+ # Use new command pattern
714
+ commands.set_default_model_preset(st.session_state.model)
715
+ with st.spinner(f"{model_label}特徴量を抽出中...", show_time=True):
716
+ cmd = commands.PatchEmbeddingCommand(batch_size=BATCH_SIZE, overwrite=True)
717
+ _ = cmd(f.path)
718
+ st.write(f"{model_label}特徴量の抽出完了。")
719
+
720
+ # Use new command pattern
721
+ commands.set_default_model_preset(st.session_state.model)
722
+
723
+ # Compute UMAP if needed
724
+ # namespace=None lets the command auto-generate if it contains '+'
725
+ cmd_namespace = None if namespace == default_namespace else namespace
726
+ t = "と".join([f.name for f in selected_files])
727
+ with st.spinner(f"{t}のUMAP計算中...", show_time=True):
728
+ umap_cmd = commands.UmapCommand(
729
+ namespace=cmd_namespace,
730
+ parent_filters=[subcluster_filter] if subcluster_filter else [],
731
+ overwrite=overwrite,
732
+ )
733
+ umap_result = umap_cmd([f.path for f in selected_files])
734
+
735
+ # Clustering
736
+ cluster_cmd = commands.ClusteringCommand(
737
+ resolution=resolution,
738
+ namespace=cmd_namespace,
739
+ parent_filters=[subcluster_filter] if subcluster_filter else [],
740
+ source=source,
741
+ overwrite=overwrite,
742
+ )
743
+
744
+ with st.spinner(f"{t}をクラスタリング中...", show_time=True):
745
+ # 単品: xx_umap.png, 複数: xx+yy/_umap.png
746
+ base = P(selected_files[0].path).stem if namespace == "default" else ""
747
+ suffix = f"_{subcluster_label}" if subcluster_filter else ""
748
+ umap_path = build_output_path(selected_files[0].path, namespace, f"{base}{suffix}_umap.png")
749
+
750
+ cluster_result = cluster_cmd([f.path for f in selected_files])
751
+
752
+ # Load UMAP embeddings and clusters from HDF5
753
+ # (handles both fresh computation and skipped cases)
754
+ with h5py.File(selected_files[0].path, "r") as hf:
755
+ umap_embs = hf[umap_result.target_path][:]
756
+ clusters = hf[cluster_result.target_path][:]
757
+ # Filter valid (non-NaN for umap, >=0 for clusters)
758
+ valid_mask = ~np.isnan(umap_embs[:, 0]) & (clusters >= 0)
759
+ umap_embs = umap_embs[valid_mask]
760
+ clusters = clusters[valid_mask]
761
+
762
+ filenames = [P(f.path).stem for f in selected_files]
763
+
764
+ fig = plot_scatter_2d(
765
+ [umap_embs],
766
+ [clusters],
767
+ filenames,
768
+ title="UMAP Projection",
769
+ xlabel="UMAP 1",
770
+ ylabel="UMAP 2",
771
+ )
772
+ fig.savefig(umap_path, bbox_inches="tight", pad_inches=0.5)
773
+
774
+ st.subheader("UMAP投射 + クラスタリング")
775
+ umap_filename = os.path.basename(umap_path)
776
+ st.image(Image.open(umap_path), caption=umap_filename)
777
+ st.write(f"{umap_filename}に出力しました。")
778
+
779
+ st.divider()
780
+
781
+ with st.spinner("オーバービュー生成中...", show_time=True):
782
+ for f in selected_files:
783
+ # Use new command pattern
784
+ commands.set_default_model_preset(st.session_state.model)
785
+ preview_cmd = commands.PreviewClustersCommand(size=THUMBNAIL_SIZE, rotate=rotate_preview)
786
+
787
+ p = P(f.path)
788
+ base = p.stem
789
+ if subcluster_filter:
790
+ base += f"_{subcluster_label}"
791
+ thumb_path = build_output_path(f.path, namespace, f"{base}_thumb.jpg")
792
+
793
+ # Determine namespace and filter_path for preview
794
+ ns = namespace if namespace else "default"
795
+ if subcluster_filter:
796
+ filter_path = "+".join(map(str, subcluster_filter))
797
+ else:
798
+ filter_path = ""
799
+
800
+ thumb = preview_cmd(f.path, namespace=ns, filter_path=filter_path)
801
+ thumb.save(thumb_path)
802
+ st.subheader("オーバービュー")
803
+ thumb_filename = os.path.basename(thumb_path)
804
+ st.image(thumb, caption=thumb_filename)
805
+ st.write(f"{thumb_filename}に出力しました。")
806
+
807
+ render_reset_button()
808
+
809
+
810
+ def recognize_file_type(selected_files: List[FileEntry]) -> FileType:
811
+ if len(selected_files) == 0:
812
+ return FileType.EMPTY
813
+ if len(selected_files) == 1:
814
+ f = selected_files[0]
815
+ return f.type
816
+
817
+ type_set = set([f.type for f in selected_files])
818
+ if len(type_set) > 1:
819
+ return FileType.MIX
820
+ t = next(iter(type_set))
821
+ return t
822
+
823
+
824
+ def main():
825
+ add_beforeunload_js()
826
+
827
+ if "locked" not in st.session_state:
828
+ set_locked_state(False)
829
+
830
+ if "model" not in st.session_state:
831
+ st.session_state.model = DEFAULT_MODEL
832
+
833
+ st.title("ロビえもんNEXT - WSI AI解析システム")
834
+
835
+ if "current_dir" not in st.session_state:
836
+ st.session_state.current_dir = BASE_DIR
837
+
838
+ default_root_abs = os.path.abspath(BASE_DIR)
839
+ current_dir_abs = os.path.abspath(st.session_state.current_dir)
840
+
841
+ render_navigation(current_dir_abs, default_root_abs)
842
+
843
+ files = list_files(st.session_state.current_dir)
844
+ selected_files = render_file_list(files)
845
+ multi = len(selected_files) > 1
846
+ file_type = recognize_file_type(selected_files)
847
+
848
+ if file_type == FileType.WSI:
849
+ render_mode_wsi(files, selected_files)
850
+ elif file_type == FileType.HDF5:
851
+ render_mode_hdf5(selected_files)
852
+ elif file_type == FileType.IMAGE:
853
+ for f in selected_files:
854
+ img = Image.open(f.path)
855
+ st.image(img)
856
+ elif file_type == FileType.EMPTY:
857
+ st.write("ファイル一覧の左の列のチェックボックスからファイルを選択してください。")
858
+ elif file_type == FileType.DIRECTORY:
859
+ if multi:
860
+ st.warning("複数フォルダが選択されました。")
861
+ else:
862
+ if st.button("このフォルダに移動"):
863
+ st.session_state.current_dir = selected_files[0].path
864
+ st.rerun()
865
+ elif file_type == FileType.OTHER:
866
+ st.warning("WSI(.ndpi, .svs)ファイルもしくはHDF5ファイル(.h5)を選択しください。")
867
+ elif file_type == FileType.MIX:
868
+ st.warning("単一種類のファイルを選択してください。")
869
+ else:
870
+ st.warning(f"Invalid file type: {file_type}")
871
+
872
+
873
+ if __name__ == "__main__":
874
+ main()