wsi-toolbox 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wsi_toolbox/__init__.py +122 -0
- wsi_toolbox/app.py +874 -0
- wsi_toolbox/cli.py +599 -0
- wsi_toolbox/commands/__init__.py +66 -0
- wsi_toolbox/commands/clustering.py +198 -0
- wsi_toolbox/commands/data_loader.py +219 -0
- wsi_toolbox/commands/dzi.py +160 -0
- wsi_toolbox/commands/patch_embedding.py +196 -0
- wsi_toolbox/commands/pca.py +206 -0
- wsi_toolbox/commands/preview.py +394 -0
- wsi_toolbox/commands/show.py +171 -0
- wsi_toolbox/commands/umap_embedding.py +174 -0
- wsi_toolbox/commands/wsi.py +223 -0
- wsi_toolbox/common.py +148 -0
- wsi_toolbox/models.py +30 -0
- wsi_toolbox/utils/__init__.py +109 -0
- wsi_toolbox/utils/analysis.py +174 -0
- wsi_toolbox/utils/hdf5_paths.py +232 -0
- wsi_toolbox/utils/plot.py +227 -0
- wsi_toolbox/utils/progress.py +207 -0
- wsi_toolbox/utils/seed.py +26 -0
- wsi_toolbox/utils/st.py +55 -0
- wsi_toolbox/utils/white.py +121 -0
- wsi_toolbox/watcher.py +256 -0
- wsi_toolbox/wsi_files.py +619 -0
- wsi_toolbox-0.2.0.dist-info/METADATA +253 -0
- wsi_toolbox-0.2.0.dist-info/RECORD +30 -0
- wsi_toolbox-0.2.0.dist-info/WHEEL +4 -0
- wsi_toolbox-0.2.0.dist-info/entry_points.txt +3 -0
- wsi_toolbox-0.2.0.dist-info/licenses/LICENSE +21 -0
wsi_toolbox/cli.py
ADDED
|
@@ -0,0 +1,599 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import warnings
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from pathlib import Path as P
|
|
5
|
+
|
|
6
|
+
import h5py
|
|
7
|
+
import numpy as np
|
|
8
|
+
from matplotlib import pyplot as plt
|
|
9
|
+
from PIL import Image
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
|
+
from pydantic_autocli import AutoCLI, param
|
|
12
|
+
|
|
13
|
+
from . import commands, common
|
|
14
|
+
from .utils.hdf5_paths import build_cluster_path
|
|
15
|
+
from .utils.plot import plot_scatter_2d, plot_violin_1d
|
|
16
|
+
from .utils.seed import fix_global_seed, get_global_seed
|
|
17
|
+
from .utils.white import create_white_detector
|
|
18
|
+
from .wsi_files import create_wsi_file
|
|
19
|
+
|
|
20
|
+
warnings.filterwarnings("ignore", category=FutureWarning, message=".*force_all_finite.*")
|
|
21
|
+
warnings.filterwarnings(
|
|
22
|
+
"ignore", category=FutureWarning, message="You are using `torch.load` with `weights_only=False`"
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "uni")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def build_output_path(input_path: str, namespace: str, filename: str) -> str:
|
|
29
|
+
"""
|
|
30
|
+
Build output path based on namespace.
|
|
31
|
+
|
|
32
|
+
- namespace="default": save in same directory as input file
|
|
33
|
+
- namespace=other: save in namespace subdirectory (created if needed)
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
input_path: Input file path (used to determine base directory)
|
|
37
|
+
namespace: Namespace string
|
|
38
|
+
filename: Output filename (with extension)
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Full output path
|
|
42
|
+
"""
|
|
43
|
+
p = P(input_path)
|
|
44
|
+
if namespace == "default":
|
|
45
|
+
output_dir = p.parent
|
|
46
|
+
else:
|
|
47
|
+
output_dir = p.parent / namespace
|
|
48
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
49
|
+
return str(output_dir / filename)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
common.set_default_progress("tqdm")
|
|
53
|
+
common.set_default_model_preset(DEFAULT_MODEL)
|
|
54
|
+
common.set_default_cluster_cmap("tab20")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class CLI(AutoCLI):
|
|
58
|
+
class CommonArgs(BaseModel):
|
|
59
|
+
seed: int = get_global_seed()
|
|
60
|
+
model: str = param(DEFAULT_MODEL, l="--model-name", s="-M")
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
def prepare(self, a: CommonArgs):
|
|
64
|
+
fix_global_seed(a.seed)
|
|
65
|
+
common.set_default_model_preset(a.model)
|
|
66
|
+
|
|
67
|
+
def _parse_white_detect(self, detect_white: list[str]) -> tuple[str, float | None]:
|
|
68
|
+
"""
|
|
69
|
+
Parse white detection arguments
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
detect_white: List of strings [method, threshold]
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Tuple of (method, threshold). threshold is None for default.
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
ValueError: If arguments are invalid
|
|
79
|
+
"""
|
|
80
|
+
if not detect_white or len(detect_white) == 0:
|
|
81
|
+
# Default: ptp with default threshold
|
|
82
|
+
return ("ptp", None)
|
|
83
|
+
|
|
84
|
+
method = detect_white[0]
|
|
85
|
+
|
|
86
|
+
# Validate method
|
|
87
|
+
valid_methods = ("ptp", "otsu", "std", "green")
|
|
88
|
+
if method not in valid_methods:
|
|
89
|
+
raise ValueError(f"Invalid method '{method}'. Must be one of {valid_methods}")
|
|
90
|
+
|
|
91
|
+
if len(detect_white) == 1:
|
|
92
|
+
return (method, None)
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
threshold = float(detect_white[1])
|
|
96
|
+
except ValueError:
|
|
97
|
+
raise ValueError(f"Invalid threshold value '{detect_white[1]}'. Must be a number.")
|
|
98
|
+
|
|
99
|
+
return (method, threshold)
|
|
100
|
+
|
|
101
|
+
class Wsi2h5Args(CommonArgs):
|
|
102
|
+
device: str = "cuda"
|
|
103
|
+
input_path: str = param(..., l="--in", s="-i")
|
|
104
|
+
output_path: str = param("", l="--out", s="-o")
|
|
105
|
+
patch_size: int = param(256, s="-S")
|
|
106
|
+
overwrite: bool = param(False, s="-O")
|
|
107
|
+
engine: str = param("auto", choices=["auto", "openslide", "tifffile"])
|
|
108
|
+
mpp: float = 0.5
|
|
109
|
+
rotate: bool = False
|
|
110
|
+
no_temp: bool = Field(False, description="Don't use temporary file (less safe)")
|
|
111
|
+
detect_white: list[str] = Field(
|
|
112
|
+
[], l="--detect-white", s="-w", description="White detection: method threshold (e.g., 'ptp 0.9')"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
def run_wsi2h5(self, a: Wsi2h5Args):
|
|
116
|
+
commands.set_default_device(a.device)
|
|
117
|
+
output_path = a.output_path
|
|
118
|
+
|
|
119
|
+
if not output_path:
|
|
120
|
+
base, ext = os.path.splitext(a.input_path)
|
|
121
|
+
output_path = base + ".h5"
|
|
122
|
+
|
|
123
|
+
tmp_path = output_path + ".tmp"
|
|
124
|
+
|
|
125
|
+
if os.path.exists(output_path):
|
|
126
|
+
if not a.overwrite:
|
|
127
|
+
print(f"{output_path} exists. Skipping.")
|
|
128
|
+
return
|
|
129
|
+
print(f"{output_path} exists but overwriting it.")
|
|
130
|
+
|
|
131
|
+
d = os.path.dirname(output_path)
|
|
132
|
+
if d:
|
|
133
|
+
os.makedirs(d, exist_ok=True)
|
|
134
|
+
|
|
135
|
+
# Parse white detection settings and create detector function
|
|
136
|
+
white_method, white_threshold = self._parse_white_detect(a.detect_white)
|
|
137
|
+
white_detector = create_white_detector(white_method, white_threshold)
|
|
138
|
+
|
|
139
|
+
print("Output path:", output_path)
|
|
140
|
+
print("Temporary path:", tmp_path)
|
|
141
|
+
print(
|
|
142
|
+
f"White detection: {white_method} (threshold: {white_threshold if white_threshold is not None else 'default'})"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# Use new command pattern (progress is auto-set from global config)
|
|
146
|
+
cmd = commands.Wsi2HDF5Command(
|
|
147
|
+
patch_size=a.patch_size,
|
|
148
|
+
engine=a.engine,
|
|
149
|
+
mpp=a.mpp,
|
|
150
|
+
rotate=a.rotate,
|
|
151
|
+
white_detector=white_detector,
|
|
152
|
+
)
|
|
153
|
+
result = cmd(a.input_path, tmp_path)
|
|
154
|
+
|
|
155
|
+
os.rename(tmp_path, output_path)
|
|
156
|
+
print("Renamed ", tmp_path, " -> ", output_path)
|
|
157
|
+
print(f"done: {result.patch_count} patches extracted")
|
|
158
|
+
|
|
159
|
+
class EmbedArgs(CommonArgs):
|
|
160
|
+
input_path: str = Field(..., l="--in", s="-i")
|
|
161
|
+
batch_size: int = Field(512, s="-B")
|
|
162
|
+
overwrite: bool = Field(False, s="-O")
|
|
163
|
+
with_latent_features: bool = Field(False, s="-L")
|
|
164
|
+
|
|
165
|
+
def run_embed(self, a: EmbedArgs):
|
|
166
|
+
# Use new command pattern
|
|
167
|
+
cmd = commands.PatchEmbeddingCommand(
|
|
168
|
+
batch_size=a.batch_size, with_latent=a.with_latent_features, overwrite=a.overwrite
|
|
169
|
+
)
|
|
170
|
+
result = cmd(a.input_path)
|
|
171
|
+
|
|
172
|
+
if not result.skipped:
|
|
173
|
+
print(f"done: {result.feature_dim}D features extracted")
|
|
174
|
+
|
|
175
|
+
class ClusterArgs(CommonArgs):
|
|
176
|
+
input_paths: list[str] = Field(..., l="--in", s="-i")
|
|
177
|
+
namespace: str = Field("", l="--namespace", s="-N", description="Namespace (auto-generated if empty)")
|
|
178
|
+
filter_ids: list[int] = Field([], l="--filter", s="-f", description="Filter cluster IDs")
|
|
179
|
+
resolution: float = Field(1.0, description="Clustering resolution")
|
|
180
|
+
source: str = Field("features", choices=["features", "umap"], description="Data source")
|
|
181
|
+
no_sort: bool = Field(False, l="--no-sort", description="Disable cluster ID reordering by PCA")
|
|
182
|
+
overwrite: bool = Field(False, s="-O")
|
|
183
|
+
|
|
184
|
+
def run_cluster(self, a: ClusterArgs):
|
|
185
|
+
# Build parent_filters
|
|
186
|
+
parent_filters = [a.filter_ids] if len(a.filter_ids) > 0 else []
|
|
187
|
+
|
|
188
|
+
# Execute clustering
|
|
189
|
+
cmd = commands.ClusteringCommand(
|
|
190
|
+
resolution=a.resolution,
|
|
191
|
+
namespace=a.namespace if a.namespace else None,
|
|
192
|
+
parent_filters=parent_filters,
|
|
193
|
+
source=a.source,
|
|
194
|
+
sort_clusters=not a.no_sort,
|
|
195
|
+
overwrite=a.overwrite,
|
|
196
|
+
)
|
|
197
|
+
result = cmd(a.input_paths)
|
|
198
|
+
|
|
199
|
+
if result.skipped:
|
|
200
|
+
print(f"⊘ Skipped (already exists): {result.target_path}")
|
|
201
|
+
else:
|
|
202
|
+
print("✓ Clustering completed")
|
|
203
|
+
print(f" Clusters: {result.cluster_count}")
|
|
204
|
+
print(f" Samples: {result.feature_count}")
|
|
205
|
+
print(f" Path: {result.target_path}")
|
|
206
|
+
|
|
207
|
+
class UmapArgs(CommonArgs):
|
|
208
|
+
input_paths: list[str] = Field(..., l="--in", s="-i")
|
|
209
|
+
namespace: str = Field("", l="--namespace", s="-N", description="Namespace (auto-generated if empty)")
|
|
210
|
+
filter_ids: list[int] = Field([], l="--filter", s="-f", description="Filter cluster IDs")
|
|
211
|
+
n_neighbors: int = Field(15, description="UMAP n_neighbors")
|
|
212
|
+
min_dist: float = Field(0.1, description="UMAP min_dist")
|
|
213
|
+
use_parent_clusters: bool = Field(False, l="--parent", s="-P", description="Use parent clusters for plotting")
|
|
214
|
+
overwrite: bool = param(False, s="-O")
|
|
215
|
+
save: bool = Field(False, description="Save plot to file")
|
|
216
|
+
show: bool = Field(False, description="Show UMAP plot")
|
|
217
|
+
|
|
218
|
+
def run_umap(self, a: UmapArgs):
|
|
219
|
+
# Build parent_filters if filter_ids specified
|
|
220
|
+
parent_filters = [a.filter_ids] if len(a.filter_ids) > 0 else []
|
|
221
|
+
|
|
222
|
+
# Create UMAP command
|
|
223
|
+
cmd = commands.UmapCommand(
|
|
224
|
+
namespace=a.namespace if a.namespace else None,
|
|
225
|
+
parent_filters=parent_filters,
|
|
226
|
+
n_components=2,
|
|
227
|
+
n_neighbors=a.n_neighbors,
|
|
228
|
+
min_dist=a.min_dist,
|
|
229
|
+
overwrite=a.overwrite,
|
|
230
|
+
)
|
|
231
|
+
result = cmd(a.input_paths)
|
|
232
|
+
|
|
233
|
+
if result.skipped:
|
|
234
|
+
print(f"⊘ Skipped (already exists): {result.target_path}")
|
|
235
|
+
else:
|
|
236
|
+
print(f"✓ UMAP computed: {result.n_samples} samples → 2D")
|
|
237
|
+
print(f" Path: {result.target_path}")
|
|
238
|
+
|
|
239
|
+
# Determine namespace
|
|
240
|
+
namespace = a.namespace if a.namespace else cmd.namespace
|
|
241
|
+
|
|
242
|
+
cluster_path = build_cluster_path(
|
|
243
|
+
a.model, namespace, filters=None if a.use_parent_clusters else parent_filters, dataset="clusters"
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
# Check if clusters exist
|
|
247
|
+
with h5py.File(a.input_paths[0], "r") as f:
|
|
248
|
+
if cluster_path not in f:
|
|
249
|
+
if a.use_parent_clusters:
|
|
250
|
+
print(f"Error: Parent clusters not found at {cluster_path}")
|
|
251
|
+
else:
|
|
252
|
+
print(f"Error: Sub-clusters not found at {cluster_path}")
|
|
253
|
+
if parent_filters:
|
|
254
|
+
print("Hint: Run clustering with same filter first, or use --parent to use parent clusters")
|
|
255
|
+
return
|
|
256
|
+
|
|
257
|
+
# Load UMAP coordinates and clusters from all files
|
|
258
|
+
coords_list = []
|
|
259
|
+
clusters_list = []
|
|
260
|
+
filenames = []
|
|
261
|
+
|
|
262
|
+
for hdf5_path in a.input_paths:
|
|
263
|
+
with h5py.File(hdf5_path, "r") as f:
|
|
264
|
+
# Check if both datasets exist
|
|
265
|
+
if result.target_path not in f:
|
|
266
|
+
print(f"Error: UMAP coordinates not found in {hdf5_path}")
|
|
267
|
+
continue
|
|
268
|
+
if cluster_path not in f:
|
|
269
|
+
print(f"Error: Clusters not found in {hdf5_path}")
|
|
270
|
+
continue
|
|
271
|
+
|
|
272
|
+
umap_coords = f[result.target_path][:]
|
|
273
|
+
clusters = f[cluster_path][:]
|
|
274
|
+
|
|
275
|
+
# Check lengths match
|
|
276
|
+
if len(umap_coords) != len(clusters):
|
|
277
|
+
print(
|
|
278
|
+
f"Error: Length mismatch in {hdf5_path}: "
|
|
279
|
+
f"UMAP coords={len(umap_coords)}, clusters={len(clusters)}"
|
|
280
|
+
)
|
|
281
|
+
continue
|
|
282
|
+
|
|
283
|
+
# Filter out NaN
|
|
284
|
+
valid_mask = ~np.isnan(umap_coords[:, 0])
|
|
285
|
+
valid_coords = umap_coords[valid_mask]
|
|
286
|
+
valid_clusters = clusters[valid_mask]
|
|
287
|
+
|
|
288
|
+
coords_list.append(valid_coords)
|
|
289
|
+
clusters_list.append(valid_clusters)
|
|
290
|
+
filenames.append(Path(hdf5_path).stem)
|
|
291
|
+
|
|
292
|
+
# Check if we have any valid data
|
|
293
|
+
if len(coords_list) == 0:
|
|
294
|
+
print("No valid data to plot.")
|
|
295
|
+
return
|
|
296
|
+
|
|
297
|
+
if (not a.save) and (not a.show):
|
|
298
|
+
# No need to plot
|
|
299
|
+
return
|
|
300
|
+
|
|
301
|
+
# Plot
|
|
302
|
+
plot_scatter_2d(
|
|
303
|
+
coords_list,
|
|
304
|
+
clusters_list,
|
|
305
|
+
filenames,
|
|
306
|
+
title="UMAP Projection",
|
|
307
|
+
xlabel="UMAP 1",
|
|
308
|
+
ylabel="UMAP 2",
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
if a.save:
|
|
312
|
+
# Build filename
|
|
313
|
+
base_name = P(a.input_paths[0]).stem if len(a.input_paths) == 1 else ""
|
|
314
|
+
if a.filter_ids:
|
|
315
|
+
filename = f"{base_name}_{'+'.join(map(str, a.filter_ids))}_umap.png"
|
|
316
|
+
else:
|
|
317
|
+
filename = f"{base_name}_umap.png"
|
|
318
|
+
|
|
319
|
+
fig_path = build_output_path(a.input_paths[0], namespace, filename)
|
|
320
|
+
plt.savefig(fig_path)
|
|
321
|
+
print(f"wrote {fig_path}")
|
|
322
|
+
|
|
323
|
+
if a.show:
|
|
324
|
+
plt.show()
|
|
325
|
+
|
|
326
|
+
class PcaArgs(CommonArgs):
|
|
327
|
+
input_paths: list[str] = Field(..., l="--in", s="-i")
|
|
328
|
+
namespace: str = Field("", l="--namespace", s="-N", description="Namespace (auto-generated if empty)")
|
|
329
|
+
filter_ids: list[int] = Field([], l="--filter", s="-f", description="Filter cluster IDs")
|
|
330
|
+
n_components: int = Field(1, s="-n", description="Number of PCA components (1, 2, or 3)")
|
|
331
|
+
scaler: str = Field("minmax", s="-s", choices=["std", "minmax"], description="Scaling method")
|
|
332
|
+
overwrite: bool = Field(False, s="-O")
|
|
333
|
+
show: bool = Field(False, description="Show PCA plot")
|
|
334
|
+
save: bool = Field(False, description="Save plot to file")
|
|
335
|
+
use_parent_clusters: bool = Field(False, l="--parent", s="-P", description="Use parent clusters for plotting")
|
|
336
|
+
|
|
337
|
+
def run_pca(self, a: PcaArgs):
|
|
338
|
+
# Build parent_filters
|
|
339
|
+
parent_filters = [a.filter_ids] if len(a.filter_ids) > 0 else []
|
|
340
|
+
|
|
341
|
+
# Execute PCA command
|
|
342
|
+
cmd = commands.PCACommand(
|
|
343
|
+
n_components=a.n_components,
|
|
344
|
+
namespace=a.namespace if a.namespace else None,
|
|
345
|
+
parent_filters=parent_filters,
|
|
346
|
+
scaler=a.scaler,
|
|
347
|
+
overwrite=a.overwrite,
|
|
348
|
+
)
|
|
349
|
+
result = cmd(a.input_paths)
|
|
350
|
+
|
|
351
|
+
if result.skipped:
|
|
352
|
+
print(f"⊘ Skipped (already exists): {result.target_path}")
|
|
353
|
+
else:
|
|
354
|
+
print("✓ PCA computed")
|
|
355
|
+
print(f" Components: {result.n_components}")
|
|
356
|
+
print(f" Samples: {result.n_samples}")
|
|
357
|
+
print(f" Path: {result.target_path}")
|
|
358
|
+
|
|
359
|
+
# Determine namespace
|
|
360
|
+
namespace = a.namespace if a.namespace else cmd.namespace
|
|
361
|
+
|
|
362
|
+
cluster_path = build_cluster_path(
|
|
363
|
+
a.model, namespace, filters=None if a.use_parent_clusters else parent_filters, dataset="clusters"
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
# Check if clusters exist
|
|
367
|
+
with h5py.File(a.input_paths[0], "r") as f:
|
|
368
|
+
if cluster_path not in f:
|
|
369
|
+
if a.use_parent_clusters:
|
|
370
|
+
print(f"Error: Parent clusters not found at {cluster_path}")
|
|
371
|
+
else:
|
|
372
|
+
print(f"Error: Sub-clusters not found at {cluster_path}")
|
|
373
|
+
if parent_filters:
|
|
374
|
+
print("Hint: Run clustering with same filter first, or use --parent to use parent clusters")
|
|
375
|
+
return
|
|
376
|
+
|
|
377
|
+
if a.n_components not in [1, 2]:
|
|
378
|
+
print("Plotting only supported for 1D or 2D PCA")
|
|
379
|
+
return
|
|
380
|
+
|
|
381
|
+
# Load PCA values and clusters from all files
|
|
382
|
+
pca_list = []
|
|
383
|
+
clusters_list = []
|
|
384
|
+
filenames = []
|
|
385
|
+
|
|
386
|
+
for hdf5_path in a.input_paths:
|
|
387
|
+
with h5py.File(hdf5_path, "r") as f:
|
|
388
|
+
# Check if both datasets exist
|
|
389
|
+
if result.target_path not in f:
|
|
390
|
+
print(f"Error: PCA values not found in {hdf5_path}")
|
|
391
|
+
continue
|
|
392
|
+
if cluster_path not in f:
|
|
393
|
+
print(f"Error: Clusters not found in {hdf5_path}")
|
|
394
|
+
continue
|
|
395
|
+
|
|
396
|
+
pca_values = f[result.target_path][:]
|
|
397
|
+
clusters = f[cluster_path][:]
|
|
398
|
+
|
|
399
|
+
# Check lengths match
|
|
400
|
+
if len(pca_values) != len(clusters):
|
|
401
|
+
print(f"Error: Length mismatch in {hdf5_path}: PCA={len(pca_values)}, clusters={len(clusters)}")
|
|
402
|
+
continue
|
|
403
|
+
|
|
404
|
+
# Filter out NaN
|
|
405
|
+
if a.n_components == 1:
|
|
406
|
+
valid_mask = ~np.isnan(pca_values)
|
|
407
|
+
else:
|
|
408
|
+
valid_mask = ~np.isnan(pca_values[:, 0])
|
|
409
|
+
|
|
410
|
+
valid_pca = pca_values[valid_mask]
|
|
411
|
+
valid_clusters = clusters[valid_mask]
|
|
412
|
+
|
|
413
|
+
pca_list.append(valid_pca)
|
|
414
|
+
clusters_list.append(valid_clusters)
|
|
415
|
+
filenames.append(Path(hdf5_path).stem)
|
|
416
|
+
|
|
417
|
+
# Check if we have any valid data
|
|
418
|
+
if len(pca_list) == 0:
|
|
419
|
+
print("No valid data to plot.")
|
|
420
|
+
return
|
|
421
|
+
|
|
422
|
+
if (not a.save) and (not a.show):
|
|
423
|
+
# No need to plot
|
|
424
|
+
return
|
|
425
|
+
|
|
426
|
+
# Plot based on dimensionality
|
|
427
|
+
if a.n_components == 1:
|
|
428
|
+
# Violin plot for 1D PCA
|
|
429
|
+
plot_violin_1d(
|
|
430
|
+
pca_list,
|
|
431
|
+
clusters_list,
|
|
432
|
+
title="Distribution of PCA Values by Cluster",
|
|
433
|
+
ylabel="PCA Value",
|
|
434
|
+
)
|
|
435
|
+
elif a.n_components == 2:
|
|
436
|
+
# Scatter plot for 2D PCA
|
|
437
|
+
plot_scatter_2d(
|
|
438
|
+
pca_list,
|
|
439
|
+
clusters_list,
|
|
440
|
+
filenames,
|
|
441
|
+
title="PCA Projection",
|
|
442
|
+
xlabel="PCA 1",
|
|
443
|
+
ylabel="PCA 2",
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
if a.save:
|
|
447
|
+
# Build filename
|
|
448
|
+
base_name = P(a.input_paths[0]).stem if len(a.input_paths) == 1 else ""
|
|
449
|
+
if a.filter_ids:
|
|
450
|
+
filename = f"{base_name}_{'+'.join(map(str, a.filter_ids))}_pca{a.n_components}.png"
|
|
451
|
+
else:
|
|
452
|
+
filename = f"{base_name}_pca{a.n_components}.png"
|
|
453
|
+
|
|
454
|
+
fig_path = build_output_path(a.input_paths[0], namespace, filename)
|
|
455
|
+
plt.savefig(fig_path)
|
|
456
|
+
print(f"wrote {fig_path}")
|
|
457
|
+
|
|
458
|
+
if a.show:
|
|
459
|
+
plt.show()
|
|
460
|
+
|
|
461
|
+
class PreviewArgs(CommonArgs):
|
|
462
|
+
input_path: str = Field(..., l="--in", s="-i")
|
|
463
|
+
output_path: str = Field("", l="--out", s="-o")
|
|
464
|
+
namespace: str = Field("default", l="--namespace", s="-N")
|
|
465
|
+
filter_ids: list[int] = Field([], l="--filter", s="-f", description="Filter cluster IDs")
|
|
466
|
+
size: int = 64
|
|
467
|
+
rotate: bool = False
|
|
468
|
+
open: bool = False
|
|
469
|
+
|
|
470
|
+
def run_preview(self, a):
|
|
471
|
+
output_path = a.output_path
|
|
472
|
+
filter_str = ""
|
|
473
|
+
if not output_path:
|
|
474
|
+
base_name = P(a.input_path).stem
|
|
475
|
+
if len(a.filter_ids) > 0:
|
|
476
|
+
filter_str = "+".join(map(str, a.filter_ids))
|
|
477
|
+
filename = f"{base_name}_{filter_str}_preview.jpg"
|
|
478
|
+
else:
|
|
479
|
+
filename = f"{base_name}_preview.jpg"
|
|
480
|
+
output_path = build_output_path(a.input_path, a.namespace, filename)
|
|
481
|
+
|
|
482
|
+
cmd = commands.PreviewClustersCommand(size=a.size, model_name=a.model, rotate=a.rotate)
|
|
483
|
+
img = cmd(a.input_path, namespace=a.namespace, filter_path=filter_str)
|
|
484
|
+
img.save(output_path)
|
|
485
|
+
print(f"wrote {output_path}")
|
|
486
|
+
|
|
487
|
+
if a.open:
|
|
488
|
+
os.system(f"xdg-open {output_path}")
|
|
489
|
+
|
|
490
|
+
class PreviewPcaArgs(CommonArgs):
|
|
491
|
+
input_path: str = Field(..., l="--in", s="-i")
|
|
492
|
+
output_path: str = Field("", l="--out", s="-o")
|
|
493
|
+
score_name: str = Field(..., l="--name", s="-n", description="Score name (e.g., 'pca1', 'pca2')")
|
|
494
|
+
namespace: str = Field("default", l="--namespace", s="-N", description="Namespace")
|
|
495
|
+
filter_ids: list[int] = Field([], l="--filter", s="-f", description="Filter cluster IDs")
|
|
496
|
+
cmap: str = Field("viridis", l="--cmap", s="-c", description="Colormap name")
|
|
497
|
+
invert: bool = Field(False, l="--invert", s="-I", description="Invert scores (1 - score)")
|
|
498
|
+
size: int = 64
|
|
499
|
+
rotate: bool = False
|
|
500
|
+
open: bool = False
|
|
501
|
+
|
|
502
|
+
def run_preview_pca(self, a):
|
|
503
|
+
output_path = a.output_path
|
|
504
|
+
filter_str = ""
|
|
505
|
+
if not output_path:
|
|
506
|
+
base_name = P(a.input_path).stem
|
|
507
|
+
if len(a.filter_ids) > 0:
|
|
508
|
+
filter_str = "+".join(map(str, a.filter_ids))
|
|
509
|
+
filename = f"{base_name}_{filter_str}_{a.score_name}_preview.jpg"
|
|
510
|
+
else:
|
|
511
|
+
filename = f"{base_name}_{a.score_name}_preview.jpg"
|
|
512
|
+
output_path = build_output_path(a.input_path, a.namespace, filename)
|
|
513
|
+
|
|
514
|
+
cmd = commands.PreviewScoresCommand(size=a.size, model_name=a.model, rotate=a.rotate)
|
|
515
|
+
img = cmd(
|
|
516
|
+
a.input_path,
|
|
517
|
+
score_name=a.score_name,
|
|
518
|
+
namespace=a.namespace,
|
|
519
|
+
filter_path=filter_str,
|
|
520
|
+
cmap_name=a.cmap,
|
|
521
|
+
invert=a.invert,
|
|
522
|
+
)
|
|
523
|
+
img.save(output_path)
|
|
524
|
+
print(f"wrote {output_path}")
|
|
525
|
+
|
|
526
|
+
if a.open:
|
|
527
|
+
os.system(f"xdg-open {output_path}")
|
|
528
|
+
|
|
529
|
+
class ShowArgs(CommonArgs):
|
|
530
|
+
input_path: str = Field(..., l="--in", s="-i", description="HDF5 file path")
|
|
531
|
+
verbose: bool = Field(False, s="-v", description="Show detailed info")
|
|
532
|
+
|
|
533
|
+
def run_show(self, a: ShowArgs):
|
|
534
|
+
"""Show HDF5 file structure and contents"""
|
|
535
|
+
cmd = commands.ShowCommand(verbose=a.verbose)
|
|
536
|
+
cmd(a.input_path)
|
|
537
|
+
|
|
538
|
+
class DziArgs(CommonArgs):
|
|
539
|
+
input_wsi: str = Field(..., l="--input", s="-i", description="Input WSI file path")
|
|
540
|
+
output_dir: str = Field(..., l="--output", s="-o", description="Output directory")
|
|
541
|
+
tile_size: int = Field(256, l="--tile-size", s="-t", description="Tile size in pixels")
|
|
542
|
+
overlap: int = Field(0, l="--overlap", description="Tile overlap in pixels")
|
|
543
|
+
jpeg_quality: int = Field(90, s="-q", description="JPEG quality (1-100)")
|
|
544
|
+
|
|
545
|
+
def run_dzi(self, a: DziArgs):
|
|
546
|
+
"""Export WSI to Deep Zoom Image (DZI) format for OpenSeadragon"""
|
|
547
|
+
|
|
548
|
+
# Get name from WSI filename
|
|
549
|
+
name = P(a.input_wsi).stem
|
|
550
|
+
|
|
551
|
+
# Use specified output directory as-is
|
|
552
|
+
output_dir = P(a.output_dir)
|
|
553
|
+
|
|
554
|
+
cmd = commands.DziCommand(
|
|
555
|
+
tile_size=a.tile_size,
|
|
556
|
+
overlap=a.overlap,
|
|
557
|
+
jpeg_quality=a.jpeg_quality,
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
result = cmd(wsi_path=a.input_wsi, output_dir=str(output_dir), name=name)
|
|
561
|
+
|
|
562
|
+
print(f"Export completed: {result.dzi_path}")
|
|
563
|
+
|
|
564
|
+
class ThumbArgs(CommonArgs):
|
|
565
|
+
input_path: str = Field(..., l="--in", s="-i", description="Input WSI file path")
|
|
566
|
+
output_path: str = Field("", l="--out", s="-o", description="Output path")
|
|
567
|
+
width: int = Field(-1, s="-w", description="Width (-1 for auto)")
|
|
568
|
+
height: int = Field(-1, s="-h", description="Height (-1 for auto)")
|
|
569
|
+
quality: int = Field(90, s="-q", description="JPEG quality (1-100)")
|
|
570
|
+
open: bool = False
|
|
571
|
+
|
|
572
|
+
def run_thumb(self, a: ThumbArgs):
|
|
573
|
+
"""Generate thumbnail from WSI"""
|
|
574
|
+
wsi = create_wsi_file(a.input_path)
|
|
575
|
+
|
|
576
|
+
thumb_array = wsi.generate_thumbnail(width=a.width, height=a.height)
|
|
577
|
+
actual_h, actual_w = thumb_array.shape[:2]
|
|
578
|
+
|
|
579
|
+
output_path = a.output_path
|
|
580
|
+
if not output_path:
|
|
581
|
+
stem = P(a.input_path).stem
|
|
582
|
+
output_path = str(P(a.input_path).parent / f"{stem}_thumb_{actual_w}x{actual_h}.jpg")
|
|
583
|
+
|
|
584
|
+
img = Image.fromarray(thumb_array)
|
|
585
|
+
img.save(output_path, "JPEG", quality=a.quality)
|
|
586
|
+
print(f"wrote {output_path}")
|
|
587
|
+
|
|
588
|
+
if a.open:
|
|
589
|
+
os.system(f"xdg-open {output_path}")
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
def main():
|
|
593
|
+
"""Entry point for wsi-toolbox CLI command."""
|
|
594
|
+
cli = CLI()
|
|
595
|
+
cli.run()
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
if __name__ == "__main__":
|
|
599
|
+
main()
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Command-based processors for WSI analysis pipeline.
|
|
3
|
+
|
|
4
|
+
Design pattern: __init__ for configuration, __call__ for execution
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
# Import configuration from common module
|
|
8
|
+
from ..common import (
|
|
9
|
+
Config,
|
|
10
|
+
_get,
|
|
11
|
+
_get_cluster_color,
|
|
12
|
+
_progress,
|
|
13
|
+
get_config,
|
|
14
|
+
set_default_cluster_cmap,
|
|
15
|
+
set_default_device,
|
|
16
|
+
set_default_model,
|
|
17
|
+
set_default_model_preset,
|
|
18
|
+
set_default_progress,
|
|
19
|
+
set_verbose,
|
|
20
|
+
)
|
|
21
|
+
from .clustering import ClusteringCommand
|
|
22
|
+
from .dzi import DziCommand
|
|
23
|
+
from .patch_embedding import PatchEmbeddingCommand
|
|
24
|
+
from .pca import PCACommand
|
|
25
|
+
from .preview import (
|
|
26
|
+
BasePreviewCommand,
|
|
27
|
+
PreviewClustersCommand,
|
|
28
|
+
PreviewLatentClusterCommand,
|
|
29
|
+
PreviewLatentPCACommand,
|
|
30
|
+
PreviewScoresCommand,
|
|
31
|
+
)
|
|
32
|
+
from .show import ShowCommand
|
|
33
|
+
from .umap_embedding import UmapCommand
|
|
34
|
+
|
|
35
|
+
# Import and export all commands
|
|
36
|
+
from .wsi import Wsi2HDF5Command
|
|
37
|
+
|
|
38
|
+
__all__ = [
|
|
39
|
+
# Config
|
|
40
|
+
"Config",
|
|
41
|
+
"get_config",
|
|
42
|
+
# Config setters
|
|
43
|
+
"set_default_progress",
|
|
44
|
+
"set_default_model",
|
|
45
|
+
"set_default_model_preset",
|
|
46
|
+
"set_default_device",
|
|
47
|
+
"set_verbose",
|
|
48
|
+
"set_default_cluster_cmap",
|
|
49
|
+
# Helper functions
|
|
50
|
+
"_get",
|
|
51
|
+
"_get_cluster_color",
|
|
52
|
+
"_progress",
|
|
53
|
+
# Commands
|
|
54
|
+
"Wsi2HDF5Command",
|
|
55
|
+
"PatchEmbeddingCommand",
|
|
56
|
+
"UmapCommand",
|
|
57
|
+
"ClusteringCommand",
|
|
58
|
+
"PCACommand",
|
|
59
|
+
"BasePreviewCommand",
|
|
60
|
+
"PreviewClustersCommand",
|
|
61
|
+
"PreviewScoresCommand",
|
|
62
|
+
"PreviewLatentPCACommand",
|
|
63
|
+
"PreviewLatentClusterCommand",
|
|
64
|
+
"DziCommand",
|
|
65
|
+
"ShowCommand",
|
|
66
|
+
]
|