wsi-toolbox 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,394 @@
1
+ """
2
+ Preview generation commands using Template Method Pattern
3
+ """
4
+
5
+ import h5py
6
+ import numpy as np
7
+ from matplotlib import colors as mcolors
8
+ from matplotlib import pyplot as plt
9
+ from PIL import Image, ImageFont
10
+
11
+ from ..utils import create_frame, get_platform_font
12
+ from ..utils.hdf5_paths import build_cluster_path
13
+ from . import _get, _get_cluster_color, _progress
14
+
15
+
16
+ class BasePreviewCommand:
17
+ """
18
+ Base class for preview commands using Template Method Pattern
19
+
20
+ Subclasses must implement:
21
+ - _prepare(f, **kwargs): Prepare data (frames, scores, etc.)
22
+ - _get_frame(index, data, f): Get frame for specific patch
23
+ """
24
+
25
+ def __init__(self, size: int = 64, font_size: int = 16, model_name: str | None = None, rotate: bool = False):
26
+ """
27
+ Initialize preview command
28
+
29
+ Args:
30
+ size: Thumbnail patch size
31
+ font_size: Font size for labels
32
+ model_name: Model name (None to use global default)
33
+ rotate: Whether to rotate patches 180 degrees
34
+ """
35
+ self.size = size
36
+ self.font_size = font_size
37
+ self.model_name = _get("model_name", model_name)
38
+ self.rotate = rotate
39
+
40
+ def __call__(self, hdf5_path: str, **kwargs) -> Image.Image:
41
+ """
42
+ Template method - common workflow for all preview commands
43
+
44
+ Args:
45
+ hdf5_path: Path to HDF5 file
46
+ **kwargs: Subclass-specific arguments
47
+
48
+ Returns:
49
+ PIL.Image: Thumbnail image
50
+ """
51
+ S = self.size
52
+
53
+ with h5py.File(hdf5_path, "r") as f:
54
+ # Load metadata
55
+ cols, rows, patch_count, patch_size = self._load_metadata(f)
56
+
57
+ # Subclass-specific preparation
58
+ data = self._prepare(f, **kwargs)
59
+
60
+ # Create canvas
61
+ canvas = Image.new("RGB", (cols * S, rows * S), (0, 0, 0))
62
+
63
+ # Render all patches (common loop)
64
+ tq = _progress(range(patch_count))
65
+ for i in tq:
66
+ coord = f["coordinates"][i]
67
+ patch_array = f["patches"][i]
68
+
69
+ # Get subclass-specific frame
70
+ frame = self._get_frame(i, data, f)
71
+
72
+ # Render patch with optional rotation
73
+ if self.rotate:
74
+ # Transform coordinates for 180-degree rotation
75
+ orig_x, orig_y = coord // patch_size * S
76
+ x = (cols - 1) * S - orig_x
77
+ y = (rows - 1) * S - orig_y
78
+ # Rotate patch 180 degrees
79
+ patch = Image.fromarray(patch_array).resize((S, S)).rotate(180)
80
+ else:
81
+ x, y = coord // patch_size * S
82
+ patch = Image.fromarray(patch_array).resize((S, S))
83
+
84
+ if frame:
85
+ patch.paste(frame, (0, 0), frame)
86
+ canvas.paste(patch, (x, y, x + S, y + S))
87
+
88
+ return canvas
89
+
90
+ def _load_metadata(self, f: h5py.File):
91
+ """Load common metadata"""
92
+ cols = f["metadata/cols"][()]
93
+ rows = f["metadata/rows"][()]
94
+ patch_count = f["metadata/patch_count"][()]
95
+ patch_size = f["metadata/patch_size"][()]
96
+ return cols, rows, patch_count, patch_size
97
+
98
+ def _prepare(self, f: h5py.File, **kwargs):
99
+ """
100
+ Prepare data for rendering (implemented by subclass)
101
+
102
+ Args:
103
+ f: HDF5 file handle
104
+ **kwargs: Subclass-specific arguments
105
+
106
+ Returns:
107
+ Any data structure needed for _get_frame()
108
+ """
109
+ raise NotImplementedError
110
+
111
+ def _get_frame(self, index: int, data, f: h5py.File):
112
+ """
113
+ Get frame for specific patch (implemented by subclass)
114
+
115
+ Args:
116
+ index: Patch index
117
+ data: Data prepared by _prepare()
118
+ f: HDF5 file handle
119
+
120
+ Returns:
121
+ PIL.Image or None: Frame overlay
122
+ """
123
+ raise NotImplementedError
124
+
125
+
126
+ class PreviewClustersCommand(BasePreviewCommand):
127
+ """
128
+ Generate thumbnail with cluster visualization
129
+
130
+ Usage:
131
+ cmd = PreviewClustersCommand(size=64)
132
+ image = cmd(hdf5_path='data.h5', cluster_name='test')
133
+ """
134
+
135
+ def _prepare(self, f: h5py.File, namespace: str = "default", filter_path: str = ""):
136
+ """
137
+ Prepare cluster frames
138
+
139
+ Args:
140
+ f: HDF5 file handle
141
+ namespace: Namespace (e.g., "default", "001+002")
142
+ filter_path: Filter path (e.g., "1+2+3" or "1+2+3/0+1")
143
+
144
+ Returns:
145
+ dict with 'clusters' and 'frames'
146
+ """
147
+
148
+ # Parse filter path
149
+ filters = None
150
+ if filter_path:
151
+ filters = []
152
+ for part in filter_path.split("/"):
153
+ filter_ids = [int(x) for x in part.split("+")]
154
+ filters.append(filter_ids)
155
+
156
+ # Build cluster path
157
+ cluster_path = build_cluster_path(self.model_name, namespace, filters)
158
+
159
+ if cluster_path not in f:
160
+ raise RuntimeError(f"{cluster_path} does not exist in HDF5 file")
161
+
162
+ clusters = f[cluster_path][:]
163
+
164
+ # Prepare frames for each cluster
165
+
166
+ font = ImageFont.truetype(font=get_platform_font(), size=self.font_size)
167
+ frames = {}
168
+
169
+ for cluster in np.unique(clusters).tolist() + [-1]:
170
+ if cluster >= 0:
171
+ color = mcolors.rgb2hex(_get_cluster_color(cluster)[:3])
172
+ else:
173
+ color = "#111"
174
+ frames[cluster] = create_frame(self.size, color, f"{cluster}", font)
175
+
176
+ return {"clusters": clusters, "frames": frames}
177
+
178
+ def _get_frame(self, index: int, data, f: h5py.File):
179
+ """Get frame for cluster at index"""
180
+ cluster = data["clusters"][index]
181
+ return data["frames"][cluster] if cluster >= 0 else None
182
+
183
+
184
+ class PreviewScoresCommand(BasePreviewCommand):
185
+ """
186
+ Generate thumbnail with PCA visualization
187
+
188
+ Usage:
189
+ cmd = PreviewScoresCommand(size=64)
190
+ image = cmd(hdf5_path='data.h5', score_name='pca1', namespace='default')
191
+ """
192
+
193
+ def _prepare(
194
+ self,
195
+ f: h5py.File,
196
+ score_name: str,
197
+ namespace: str = "default",
198
+ filter_path: str = "",
199
+ cmap_name: str = "viridis",
200
+ invert: bool = False,
201
+ ):
202
+ """
203
+ Prepare PCA visualization data
204
+
205
+ Args:
206
+ f: HDF5 file handle
207
+ score_name: Score dataset name (e.g., 'pca1', 'pca2')
208
+ namespace: Namespace (e.g., "default", "001+002")
209
+ filter_path: Filter path (e.g., "1+2+3" or "1+2+3/0+1")
210
+ cmap_name: Colormap name
211
+ invert: Invert scores (1 - score)
212
+
213
+ Returns:
214
+ dict with 'scores', 'cmap', and 'font'
215
+ """
216
+
217
+ # Parse filter path
218
+ filters = None
219
+ if filter_path:
220
+ filters = []
221
+ for part in filter_path.split("/"):
222
+ filter_ids = [int(x) for x in part.split("+")]
223
+ filters.append(filter_ids)
224
+
225
+ # Build hierarchical path
226
+ score_path = build_cluster_path(self.model_name, namespace, filters, dataset=score_name)
227
+
228
+ if score_path not in f:
229
+ raise RuntimeError(f"{score_path} does not exist in HDF5 file")
230
+
231
+ scores = f[score_path][:]
232
+
233
+ # Handle multi-dimensional scores (take first component)
234
+ if scores.ndim > 1:
235
+ scores = scores[:, 0]
236
+
237
+ # Invert scores if requested
238
+ if invert:
239
+ scores = 1 - scores
240
+
241
+ # Prepare font and colormap
242
+ font = ImageFont.truetype(font=get_platform_font(), size=self.font_size)
243
+ cmap = plt.get_cmap(cmap_name)
244
+
245
+ return {"scores": scores, "cmap": cmap, "font": font}
246
+
247
+ def _get_frame(self, index: int, data, f: h5py.File):
248
+ """Get frame for score at index"""
249
+ score = data["scores"][index]
250
+
251
+ if np.isnan(score):
252
+ return None
253
+
254
+ color = mcolors.rgb2hex(data["cmap"](score)[:3])
255
+ return create_frame(self.size, color, f"{score:.3f}", data["font"])
256
+
257
+
258
+ class PreviewLatentPCACommand(BasePreviewCommand):
259
+ """
260
+ Generate thumbnail with latent PCA visualization
261
+
262
+ Usage:
263
+ cmd = PreviewLatentPCACommand(size=64)
264
+ image = cmd(hdf5_path='data.h5', alpha=0.5)
265
+ """
266
+
267
+ def _prepare(self, f: h5py.File, alpha: float = 0.5):
268
+ """
269
+ Prepare latent PCA visualization data
270
+
271
+ Args:
272
+ f: HDF5 file handle
273
+ alpha: Transparency of overlay (0.0-1.0)
274
+
275
+ Returns:
276
+ dict with 'overlays' and 'alpha_mask'
277
+ """
278
+ # Lazy import: sklearn is slow to load (~600ms), defer until needed
279
+ from sklearn.decomposition import PCA # noqa: PLC0415
280
+ from sklearn.preprocessing import MinMaxScaler # noqa: PLC0415
281
+
282
+ # Load latent features
283
+ h = f[f"{self.model_name}/latent_features"][()] # B, L(16x16), EMB(1024)
284
+ h = h.astype(np.float32)
285
+ s = h.shape
286
+
287
+ # Estimate original latent size
288
+ latent_size = int(np.sqrt(s[1])) # l = sqrt(L)
289
+ # Validate dyadicity
290
+ assert latent_size**2 == s[1]
291
+ if self.size % latent_size != 0:
292
+ print(f"WARNING: {self.size} is not divisible by {latent_size}")
293
+
294
+ # Apply PCA
295
+ pca = PCA(n_components=3)
296
+ latent_pca = pca.fit_transform(h.reshape(s[0] * s[1], s[-1])) # B*L, 3
297
+
298
+ # Normalize to [0, 1]
299
+ scaler = MinMaxScaler()
300
+ latent_pca = scaler.fit_transform(latent_pca)
301
+
302
+ # Reshape and convert to RGB
303
+ latent_pca = latent_pca.reshape(s[0], latent_size, latent_size, 3)
304
+ overlays = (latent_pca * 255).astype(np.uint8) # B, l, l, 3
305
+
306
+ # Create alpha mask
307
+ alpha_mask = Image.new("L", (self.size, self.size), int(alpha * 255))
308
+
309
+ return {"overlays": overlays, "alpha_mask": alpha_mask, "latent_size": latent_size}
310
+
311
+ def _get_frame(self, index: int, data, f: h5py.File):
312
+ """
313
+ Get latent PCA overlay as a frame for patch at index
314
+
315
+ Args:
316
+ index: Patch index
317
+ data: Data prepared by _prepare()
318
+ f: HDF5 file handle
319
+
320
+ Returns:
321
+ PIL.Image: RGBA overlay image
322
+ """
323
+ # Get overlay for this patch
324
+ overlay = Image.fromarray(data["overlays"][index]).convert("RGBA")
325
+ overlay = overlay.resize((self.size, self.size), Image.NEAREST)
326
+
327
+ # Apply alpha mask to make it an overlay
328
+ overlay.putalpha(data["alpha_mask"])
329
+
330
+ return overlay
331
+
332
+
333
+ class PreviewLatentClusterCommand(BasePreviewCommand):
334
+ """
335
+ Generate thumbnail with latent cluster visualization
336
+
337
+ Usage:
338
+ cmd = PreviewLatentClusterCommand(size=64)
339
+ image = cmd(hdf5_path='data.h5', alpha=0.5)
340
+ """
341
+
342
+ def _prepare(self, f: h5py.File, alpha: float = 0.5):
343
+ """
344
+ Prepare latent cluster visualization data
345
+
346
+ Args:
347
+ f: HDF5 file handle
348
+ alpha: Transparency of overlay (0.0-1.0)
349
+
350
+ Returns:
351
+ dict with 'overlays' and 'alpha_mask'
352
+ """
353
+ # Load latent clusters
354
+ clusters = f[f"{self.model_name}/latent_clusters"][()] # B, L(16x16)
355
+ s = clusters.shape
356
+
357
+ # Estimate original latent size
358
+ latent_size = int(np.sqrt(s[1])) # l = sqrt(L)
359
+ # Validate dyadicity
360
+ assert latent_size**2 == s[1]
361
+ if self.size % latent_size != 0:
362
+ print(f"WARNING: {self.size} is not divisible by {latent_size}")
363
+
364
+ # Apply colormap
365
+ cmap = plt.get_cmap("tab20")
366
+ latent_map = cmap(clusters)
367
+ latent_map = latent_map.reshape(s[0], latent_size, latent_size, 4)
368
+ overlays = (latent_map * 255).astype(np.uint8) # B, l, l, 4
369
+
370
+ # Create alpha mask
371
+ alpha_mask = Image.new("L", (self.size, self.size), int(alpha * 255))
372
+
373
+ return {"overlays": overlays, "alpha_mask": alpha_mask, "latent_size": latent_size}
374
+
375
+ def _get_frame(self, index: int, data, f: h5py.File):
376
+ """
377
+ Get latent cluster overlay as a frame for patch at index
378
+
379
+ Args:
380
+ index: Patch index
381
+ data: Data prepared by _prepare()
382
+ f: HDF5 file handle
383
+
384
+ Returns:
385
+ PIL.Image: RGBA overlay image
386
+ """
387
+ # Get overlay for this patch
388
+ overlay = Image.fromarray(data["overlays"][index]).convert("RGBA")
389
+ overlay = overlay.resize((self.size, self.size), Image.NEAREST)
390
+
391
+ # Apply alpha mask to make it an overlay
392
+ overlay.putalpha(data["alpha_mask"])
393
+
394
+ return overlay
@@ -0,0 +1,171 @@
1
+ """
2
+ Show HDF5 file structure command
3
+ """
4
+
5
+ import h5py
6
+ from pydantic import BaseModel
7
+
8
+ from ..utils.hdf5_paths import list_namespaces
9
+
10
+
11
+ class ShowResult(BaseModel):
12
+ """Result of show command"""
13
+
14
+ patch_count: int | None = None
15
+ patch_size: int | None = None
16
+ models: list[str] = []
17
+ namespaces: dict[str, list[str]] = {}
18
+
19
+
20
+ class ShowCommand:
21
+ """
22
+ Show HDF5 file structure and contents
23
+
24
+ Usage:
25
+ cmd = ShowCommand(verbose=True)
26
+ result = cmd("data.h5")
27
+ """
28
+
29
+ def __init__(self, verbose: bool = False):
30
+ self.verbose = verbose
31
+
32
+ def __call__(self, hdf5_path: str) -> ShowResult:
33
+ """
34
+ Show HDF5 file structure
35
+
36
+ Args:
37
+ hdf5_path: Path to HDF5 file
38
+
39
+ Returns:
40
+ ShowResult: Structure information
41
+ """
42
+ result = ShowResult()
43
+
44
+ with h5py.File(hdf5_path, "r") as f:
45
+ self._print_header(hdf5_path)
46
+ self._print_basic_info(f, result)
47
+ self._print_models(f, result)
48
+ self._print_namespaces(f, result)
49
+ self._print_scores(f, result)
50
+ self._print_footer()
51
+
52
+ return result
53
+
54
+ def _print_header(self, path: str):
55
+ print(f"\n{'=' * 60}")
56
+ print(f"HDF5 File: {path}")
57
+ print(f"{'=' * 60}\n")
58
+
59
+ def _print_footer(self):
60
+ print(f"{'=' * 60}\n")
61
+
62
+ def _print_basic_info(self, f: h5py.File, result: ShowResult):
63
+ if "metadata/patch_count" in f:
64
+ result.patch_count = int(f["metadata/patch_count"][()])
65
+ result.patch_size = int(f["metadata/patch_size"][()])
66
+
67
+ print("Basic Info:")
68
+ print(f" Patch Count: {result.patch_count}")
69
+ print(f" Patch Size: {result.patch_size}px")
70
+ print(f" Grid: {f['metadata/cols'][()]} x {f['metadata/rows'][()]} (cols x rows)")
71
+ if "metadata/mpp" in f:
72
+ mpp = f["metadata/mpp"][()]
73
+ print(f" MPP: {mpp:.4f}" + (" (estimated)" if mpp > 0 else ""))
74
+ print()
75
+
76
+ def _print_models(self, f: h5py.File, result: ShowResult):
77
+ available_models = [k for k in f.keys() if k in ["uni", "gigapath", "virchow2"]]
78
+ result.models = available_models
79
+
80
+ if available_models:
81
+ print("Available Models:")
82
+ for model in available_models:
83
+ has_features = f"{model}/features" in f
84
+ has_latent = f"{model}/latent_features" in f
85
+ feat_str = "features" if has_features else "x features"
86
+ latent_str = ", latent" if has_latent else ""
87
+
88
+ if has_features:
89
+ feat_shape = f[f"{model}/features"].shape
90
+ feat_str += f" {feat_shape}"
91
+
92
+ print(f" {model:12s} {feat_str}{latent_str}")
93
+ print()
94
+
95
+ def _print_namespaces(self, f: h5py.File, result: ShowResult):
96
+ available_models = result.models
97
+
98
+ for model in available_models:
99
+ namespaces = list_namespaces(f, model)
100
+ if not namespaces:
101
+ continue
102
+
103
+ result.namespaces[model] = namespaces
104
+
105
+ print(f"{model.upper()} Namespaces:")
106
+ for ns in namespaces:
107
+ cluster_path = f"{model}/{ns}/clusters"
108
+ if cluster_path in f:
109
+ clusters = f[cluster_path][:]
110
+ unique_clusters = [c for c in sorted(set(clusters)) if c >= 0]
111
+ n_clustered = sum(clusters >= 0)
112
+ n_total = len(clusters)
113
+
114
+ umap_path = f"{model}/{ns}/umap"
115
+ has_umap = "o" if umap_path in f else "x"
116
+
117
+ ns_display = "default" if ns == "default" else ns
118
+ print(f" {ns_display}/")
119
+ print(f" clusters: {len(unique_clusters)} clusters, {n_clustered}/{n_total} patches")
120
+ if self.verbose:
121
+ cluster_list = ", ".join(map(str, unique_clusters[:10]))
122
+ if len(unique_clusters) > 10:
123
+ cluster_list += f", ... ({len(unique_clusters)} total)"
124
+ print(f" [{cluster_list}]")
125
+ print(f" umap: {has_umap}")
126
+
127
+ # Check filters
128
+ filter_base = f"{model}/{ns}/filter"
129
+ if filter_base in f:
130
+ filters = self._list_filters_recursive(f, filter_base)
131
+ if filters:
132
+ print(" filters:")
133
+ for filter_path in sorted(filters):
134
+ full_path = f"{filter_base}/{filter_path}/clusters"
135
+ if full_path in f:
136
+ fclusters = f[full_path][:]
137
+ funique = [c for c in sorted(set(fclusters)) if c >= 0]
138
+ fn_clustered = sum(fclusters >= 0)
139
+ print(f" {filter_path}/ -> {len(funique)} clusters, {fn_clustered} patches")
140
+ print()
141
+
142
+ def _print_scores(self, f: h5py.File, result: ShowResult):
143
+ for model in result.models:
144
+ score_datasets = [k for k in f.get(model, {}).keys() if k.startswith("scores_")]
145
+ if score_datasets:
146
+ print(f"{model.upper()} Scores:")
147
+ for score in score_datasets:
148
+ score_name = score.replace("scores_", "")
149
+ print(f" {score_name}")
150
+ print()
151
+
152
+ def _list_filters_recursive(self, f: h5py.File, base_path: str, prefix: str = "") -> list[str]:
153
+ """Recursively list all filter paths"""
154
+ filters = []
155
+ if base_path not in f:
156
+ return filters
157
+
158
+ for key in f[base_path].keys():
159
+ current_path = f"{prefix}{key}"
160
+ item_path = f"{base_path}/{key}"
161
+
162
+ if isinstance(f[item_path], h5py.Group):
163
+ if "clusters" in f[item_path]:
164
+ filters.append(current_path)
165
+
166
+ nested_base = f"{item_path}/filter"
167
+ if nested_base in f:
168
+ nested = self._list_filters_recursive(f, nested_base, f"{current_path}/filter/")
169
+ filters.extend(nested)
170
+
171
+ return filters