spacr 0.4.15__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/__init__.py +2 -2
- spacr/core.py +52 -10
- spacr/deep_spacr.py +2 -3
- spacr/gui.py +0 -1
- spacr/gui_core.py +247 -41
- spacr/gui_elements.py +133 -2
- spacr/gui_utils.py +22 -17
- spacr/io.py +624 -149
- spacr/ml.py +141 -258
- spacr/plot.py +76 -34
- spacr/resources/MEDIAR/__pycache__/SetupDict.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/__pycache__/evaluate.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/__pycache__/generate_mapping.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/__pycache__/main.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/Baseline/__pycache__/Predictor.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/Baseline/__pycache__/Trainer.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/Baseline/__pycache__/__init__.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/Baseline/__pycache__/utils.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/MEDIAR/__pycache__/EnsemblePredictor.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/MEDIAR/__pycache__/Predictor.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/MEDIAR/__pycache__/Trainer.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/MEDIAR/__pycache__/__init__.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/MEDIAR/__pycache__/utils.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/__pycache__/BasePredictor.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/__pycache__/BaseTrainer.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/__pycache__/__init__.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/__pycache__/utils.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/__pycache__/__init__.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/__pycache__/measures.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/__pycache__/utils.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/data_utils/__pycache__/__init__.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/data_utils/__pycache__/datasetter.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/data_utils/__pycache__/transforms.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/data_utils/__pycache__/utils.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/data_utils/custom/__pycache__/CellAware.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/data_utils/custom/__pycache__/LoadImage.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/data_utils/custom/__pycache__/NormalizeImage.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/data_utils/custom/__pycache__/__init__.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/models/__pycache__/MEDIARFormer.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/models/__pycache__/__init__.cpython-39.pyc +0 -0
- spacr/sequencing.py +73 -38
- spacr/settings.py +161 -135
- spacr/submodules.py +618 -215
- spacr/timelapse.py +197 -29
- spacr/toxo.py +23 -23
- spacr/utils.py +186 -128
- {spacr-0.4.15.dist-info → spacr-0.5.0.dist-info}/METADATA +5 -2
- {spacr-0.4.15.dist-info → spacr-0.5.0.dist-info}/RECORD +53 -24
- spacr/stats.py +0 -221
- /spacr/{cellpose.py → spacr_cellpose.py} +0 -0
- {spacr-0.4.15.dist-info → spacr-0.5.0.dist-info}/LICENSE +0 -0
- {spacr-0.4.15.dist-info → spacr-0.5.0.dist-info}/WHEEL +0 -0
- {spacr-0.4.15.dist-info → spacr-0.5.0.dist-info}/entry_points.txt +0 -0
- {spacr-0.4.15.dist-info → spacr-0.5.0.dist-info}/top_level.txt +0 -0
spacr/io.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
import os, re, sqlite3, gc, torch, time, random, shutil, cv2, tarfile, cellpose, glob, queue, tifffile, czifile, atexit, datetime
|
1
|
+
import os, re, sqlite3, gc, torch, time, random, shutil, cv2, tarfile, cellpose, glob, queue, tifffile, czifile, atexit, datetime, traceback
|
2
2
|
import numpy as np
|
3
3
|
import pandas as pd
|
4
4
|
from PIL import Image, ImageOps
|
@@ -23,6 +23,8 @@ import seaborn as sns
|
|
23
23
|
from nd2reader import ND2Reader
|
24
24
|
from torchvision import transforms
|
25
25
|
from sklearn.model_selection import train_test_split
|
26
|
+
import readlif
|
27
|
+
from pylibCZIrw import czi as pyczi
|
26
28
|
|
27
29
|
def process_non_tif_non_2D_images(folder):
|
28
30
|
"""Processes all images in the folder and splits them into grayscale channels, preserving bit depth."""
|
@@ -131,58 +133,61 @@ def process_non_tif_non_2D_images(folder):
|
|
131
133
|
|
132
134
|
def _load_images_and_labels(image_files, label_files, invert=False):
|
133
135
|
|
134
|
-
from .utils import invert_image
|
136
|
+
from .utils import invert_image
|
135
137
|
|
136
138
|
images = []
|
137
139
|
labels = []
|
138
|
-
|
139
|
-
if not image_files is None:
|
140
|
-
image_names = sorted([os.path.basename(f) for f in image_files])
|
141
|
-
else:
|
142
|
-
image_names = []
|
143
|
-
|
144
|
-
if not label_files is None:
|
145
|
-
label_names = sorted([os.path.basename(f) for f in label_files])
|
146
|
-
else:
|
147
|
-
label_names = []
|
148
140
|
|
149
|
-
|
141
|
+
image_names = sorted([os.path.basename(f) for f in image_files]) if image_files else []
|
142
|
+
label_names = sorted([os.path.basename(f) for f in label_files]) if label_files else []
|
143
|
+
|
144
|
+
if image_files and label_files:
|
150
145
|
for img_file, lbl_file in zip(image_files, label_files):
|
151
146
|
image = cellpose.io.imread(img_file)
|
147
|
+
if image is None:
|
148
|
+
print(f"WARNING: Could not load image: {img_file}")
|
149
|
+
continue
|
152
150
|
if invert:
|
153
151
|
image = invert_image(image)
|
154
|
-
label = cellpose.io.imread(lbl_file)
|
155
152
|
if image.max() > 1:
|
156
153
|
image = image / image.max()
|
154
|
+
|
155
|
+
label = cellpose.io.imread(lbl_file)
|
156
|
+
if label is None:
|
157
|
+
print(f"WARNING: Could not load label: {lbl_file}")
|
158
|
+
continue
|
159
|
+
|
157
160
|
images.append(image)
|
158
161
|
labels.append(label)
|
159
|
-
|
162
|
+
|
163
|
+
elif image_files:
|
160
164
|
for img_file in image_files:
|
161
165
|
image = cellpose.io.imread(img_file)
|
166
|
+
if image is None:
|
167
|
+
print(f"WARNING: Could not load image: {img_file}")
|
168
|
+
continue
|
162
169
|
if invert:
|
163
170
|
image = invert_image(image)
|
164
171
|
if image.max() > 1:
|
165
172
|
image = image / image.max()
|
166
173
|
images.append(image)
|
167
|
-
|
168
|
-
|
169
|
-
|
174
|
+
|
175
|
+
elif label_files:
|
176
|
+
for lbl_file in label_files:
|
177
|
+
label = cellpose.io.imread(lbl_file)
|
178
|
+
if label is None:
|
179
|
+
print(f"WARNING: Could not load label: {lbl_file}")
|
180
|
+
continue
|
170
181
|
labels.append(label)
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
image_dir = None
|
176
|
-
|
177
|
-
if not label_files is None:
|
178
|
-
label_dir = os.path.dirname(label_files[0])
|
179
|
-
else:
|
180
|
-
label_dir = None
|
181
|
-
|
182
|
-
# Log the number of loaded images and labels
|
182
|
+
|
183
|
+
image_dir = os.path.dirname(image_files[0]) if image_files else None
|
184
|
+
label_dir = os.path.dirname(label_files[0]) if label_files else None
|
185
|
+
|
183
186
|
print(f'Loaded {len(images)} images and {len(labels)} labels from {image_dir} and {label_dir}')
|
184
|
-
if
|
185
|
-
print(f'image shape: {images[0].shape}, image type: images[0].
|
187
|
+
if images and labels:
|
188
|
+
print(f'image shape: {images[0].shape}, image type: {images[0].dtype}; '
|
189
|
+
f'label shape: {labels[0].shape}, label type: {labels[0].dtype}')
|
190
|
+
|
186
191
|
return images, labels, image_names, label_names
|
187
192
|
|
188
193
|
def _load_normalized_images_and_labels(image_files, label_files, channels=None, percentiles=None,
|
@@ -647,8 +652,8 @@ def load_images_from_paths(images_by_key):
|
|
647
652
|
|
648
653
|
return images_dict
|
649
654
|
|
650
|
-
#@log_function_call
|
651
|
-
def _rename_and_organize_image_files(src, regex, batch_size=100,
|
655
|
+
#@log_function_call
|
656
|
+
def _rename_and_organize_image_files(src, regex, batch_size=100, metadata_type='', img_format='.tif', timelapse=False):
|
652
657
|
"""
|
653
658
|
Convert z-stack images to maximum intensity projection (MIP) images.
|
654
659
|
|
@@ -656,24 +661,26 @@ def _rename_and_organize_image_files(src, regex, batch_size=100, pick_slice=Fals
|
|
656
661
|
src (str): The source directory containing the z-stack images.
|
657
662
|
regex (str): The regular expression pattern used to match the filenames of the z-stack images.
|
658
663
|
batch_size (int, optional): The number of images to process in each batch. Defaults to 100.
|
659
|
-
pick_slice (bool, optional): Whether to pick a specific slice based on the provided skip mode. Defaults to False.
|
660
|
-
skip_mode (str, optional): The skip mode used to filter out specific slices. Defaults to '01'.
|
661
664
|
metadata_type (str, optional): The type of metadata associated with the images. Defaults to ''.
|
662
665
|
|
663
666
|
Returns:
|
664
667
|
None
|
665
668
|
"""
|
666
669
|
|
670
|
+
if isinstance(img_format, str):
|
671
|
+
img_format = [img_format]
|
672
|
+
|
667
673
|
from .utils import _extract_filename_metadata, print_progress
|
668
674
|
|
669
675
|
regular_expression = re.compile(regex)
|
670
676
|
stack_path = os.path.join(src, 'stack')
|
671
677
|
files_processed = 0
|
672
678
|
if not os.path.exists(stack_path) or (os.path.isdir(stack_path) and len(os.listdir(stack_path)) == 0):
|
673
|
-
all_filenames = [filename for filename in os.listdir(src) if filename.endswith(img_format)]
|
679
|
+
all_filenames = [filename for filename in os.listdir(src) if any(filename.endswith(ext) for ext in img_format)]
|
674
680
|
print(f'All files: {len(all_filenames)} in {src}')
|
681
|
+
all_filenames = [f for f in all_filenames if not f.startswith('.')] #Exclude hidden files
|
675
682
|
time_ls = []
|
676
|
-
image_paths_by_key = _extract_filename_metadata(all_filenames, src, regular_expression, metadata_type
|
683
|
+
image_paths_by_key = _extract_filename_metadata(all_filenames, src, regular_expression, metadata_type)
|
677
684
|
# Convert dictionary keys to a list for batching
|
678
685
|
batching_keys = list(image_paths_by_key.keys())
|
679
686
|
print(f'All unique FOV: {len(image_paths_by_key)} in {src}')
|
@@ -684,56 +691,43 @@ def _rename_and_organize_image_files(src, regex, batch_size=100, pick_slice=Fals
|
|
684
691
|
batch_keys = batching_keys[idx:idx+batch_size]
|
685
692
|
batch_images_by_key = {key: image_paths_by_key[key] for key in batch_keys}
|
686
693
|
images_by_key = load_images_from_paths(batch_images_by_key)
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
os.makedirs(output_dir, exist_ok=True)
|
695
|
-
output_filename = f'{plate}_{well}_{field}.tif'
|
696
|
-
output_path = os.path.join(output_dir, output_filename)
|
697
|
-
files_processed += 1
|
698
|
-
stop = time.time()
|
699
|
-
duration = stop - start
|
700
|
-
time_ls.append(duration)
|
701
|
-
files_to_process = len(all_filenames)
|
702
|
-
print_progress(files_processed, files_to_process, n_jobs=1, time_ls=time_ls, batch_size=batch_size, operation_type='Preprocessing filenames')
|
703
|
-
|
704
|
-
if not os.path.exists(output_path):
|
705
|
-
mip_image.save(output_path)
|
706
|
-
else:
|
707
|
-
print(f'WARNING: A file with the same name already exists at location {output_filename}')
|
708
|
-
else:
|
709
|
-
for i, (key, images) in enumerate(images_by_key.items()):
|
710
|
-
plate, well, field, channel = key[:4]
|
711
|
-
output_dir = os.path.join(src, channel)
|
712
|
-
mip = np.max(np.stack(images), axis=0)
|
713
|
-
mip_image = Image.fromarray(mip)
|
714
|
-
os.makedirs(output_dir, exist_ok=True)
|
694
|
+
|
695
|
+
# Process each batch of images
|
696
|
+
for i, (key, images) in enumerate(images_by_key.items()):
|
697
|
+
|
698
|
+
plate, well, field, channel, timeID, sliceID = key
|
699
|
+
|
700
|
+
if timelapse:
|
715
701
|
output_filename = f'{plate}_{well}_{field}.tif'
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
702
|
+
else:
|
703
|
+
output_filename = f'{plate}_{well}_{field}_{timeID}.tif'
|
704
|
+
|
705
|
+
output_dir = os.path.join(src, channel)
|
706
|
+
os.makedirs(output_dir, exist_ok=True)
|
707
|
+
output_path = os.path.join(output_dir, output_filename)
|
708
|
+
mip = np.max(np.stack(images), axis=0)
|
709
|
+
mip_image = Image.fromarray(mip)
|
710
|
+
|
711
|
+
files_processed += 1
|
712
|
+
stop = time.time()
|
713
|
+
duration = stop - start
|
714
|
+
time_ls.append(duration)
|
715
|
+
files_to_process = len(all_filenames)
|
716
|
+
print_progress(files_processed, files_to_process, n_jobs=1, time_ls=time_ls, batch_size=batch_size, operation_type='Preprocessing filenames')
|
717
|
+
|
718
|
+
if not os.path.exists(output_path):
|
719
|
+
mip_image.save(output_path)
|
720
|
+
else:
|
721
|
+
print(f'WARNING: A file with the same name already exists at location {output_filename}')
|
728
722
|
|
729
723
|
images_by_key.clear()
|
730
724
|
|
731
725
|
# Move original images to a new directory
|
732
|
-
valid_exts = [img_format]
|
733
726
|
newpath = os.path.join(src, 'orig')
|
734
727
|
os.makedirs(newpath, exist_ok=True)
|
735
728
|
for filename in os.listdir(src):
|
736
|
-
|
729
|
+
#print(f"{filename}: {os.path.splitext(filename)[1]}")
|
730
|
+
if os.path.splitext(filename)[1] in img_format:
|
737
731
|
move = os.path.join(newpath, filename)
|
738
732
|
if os.path.exists(move):
|
739
733
|
print(f'WARNING: A file with the same name already exists at location {move}')
|
@@ -1236,7 +1230,11 @@ def concatenate_and_normalize(src, channels, save_dtype=np.float32, settings={})
|
|
1236
1230
|
files_processed = 0
|
1237
1231
|
for i, path in enumerate(paths):
|
1238
1232
|
start = time.time()
|
1239
|
-
|
1233
|
+
try:
|
1234
|
+
array = np.load(path)
|
1235
|
+
except Exception as e:
|
1236
|
+
print(f"Error loading file {path}: {e}")
|
1237
|
+
continue
|
1240
1238
|
stack_ls.append(array)
|
1241
1239
|
filenames_batch.append(os.path.basename(path))
|
1242
1240
|
stop = time.time()
|
@@ -1564,30 +1562,34 @@ def preprocess_img_data(settings):
|
|
1564
1562
|
save_dtype (type, optional): The data type used for saving the preprocessed images. Defaults to np.float32.
|
1565
1563
|
randomize (bool, optional): Whether to randomize the order of the images. Defaults to True.
|
1566
1564
|
all_to_mip (bool, optional): Whether to convert all images to MIP. Defaults to False.
|
1567
|
-
pick_slice (bool, optional): Whether to pick a specific slice based on the provided skip mode. Defaults to False.
|
1568
|
-
skip_mode (str, optional): The skip mode used to filter out specific slices. Defaults to '01'.
|
1569
1565
|
settings (dict, optional): Additional settings for preprocessing. Defaults to {}.
|
1570
1566
|
|
1571
1567
|
Returns:
|
1572
1568
|
None
|
1573
1569
|
"""
|
1574
|
-
|
1570
|
+
|
1575
1571
|
src = settings['src']
|
1576
|
-
|
1572
|
+
delete_empty_subdirectories(src)
|
1577
1573
|
files = os.listdir(src)
|
1578
|
-
extensions = [file.split('.')[-1] for file in files]
|
1579
|
-
extension_counts = Counter(extensions)
|
1580
|
-
most_common_extension = extension_counts.most_common(1)[0][0]
|
1581
|
-
img_format = None
|
1582
1574
|
|
1583
|
-
|
1575
|
+
valid_ext = ['tif', 'tiff', 'png', 'jpg', 'jpeg', 'bmp', 'nd2', 'czi', 'lif']
|
1576
|
+
extensions = [file.split('.')[-1].lower() for file in files]
|
1577
|
+
# Filter only valid extensions
|
1578
|
+
valid_extensions = [ext for ext in extensions if ext in valid_ext]
|
1584
1579
|
|
1585
|
-
#
|
1586
|
-
|
1587
|
-
|
1588
|
-
|
1580
|
+
# Determine most common valid extension
|
1581
|
+
img_format = None
|
1582
|
+
if valid_extensions:
|
1583
|
+
extension_counts = Counter(valid_extensions)
|
1584
|
+
most_common_extension = Counter(valid_extensions).most_common(1)[0][0]
|
1585
|
+
img_format = most_common_extension
|
1586
|
+
|
1587
|
+
print(f"Found {extension_counts[most_common_extension]} {most_common_extension} files")
|
1588
|
+
|
1589
1589
|
else:
|
1590
|
-
print(f
|
1590
|
+
print(f"Could not find any {valid_ext} files in {src} only found {extension_counts[0]}")
|
1591
|
+
print(f"{files} in {src}")
|
1592
|
+
print(f"Please check the folder and try again")
|
1591
1593
|
|
1592
1594
|
if os.path.exists(os.path.join(src,'stack')):
|
1593
1595
|
print('Found existing stack folder.')
|
@@ -1598,23 +1600,24 @@ def preprocess_img_data(settings):
|
|
1598
1600
|
return settings, src
|
1599
1601
|
|
1600
1602
|
mask_channels = [settings['nucleus_channel'], settings['cell_channel'], settings['pathogen_channel']]
|
1601
|
-
backgrounds = [settings['nucleus_background'], settings['cell_background'], settings['pathogen_background']]
|
1602
1603
|
|
1603
|
-
settings
|
1604
|
+
settings = set_default_settings_preprocess_img_data(settings)
|
1604
1605
|
|
1605
|
-
regex = _get_regex(metadata_type, img_format, custom_regex)
|
1606
|
-
|
1607
|
-
if test_mode:
|
1606
|
+
regex = _get_regex(settings['metadata_type'], img_format, settings['custom_regex'])
|
1607
|
+
|
1608
|
+
if settings['test_mode']:
|
1608
1609
|
|
1609
|
-
print(f
|
1610
|
+
print(f"Running spacr in test mode")
|
1610
1611
|
settings['plot'] = True
|
1611
1612
|
try:
|
1612
1613
|
os.rmdir(os.path.join(src, 'test'))
|
1613
1614
|
print(f"Deleted test directory: {os.path.join(src, 'test')}")
|
1614
1615
|
except OSError as e:
|
1616
|
+
print(f"Error deleting test directory: {e}")
|
1617
|
+
print(f"Delete manually before running test mode")
|
1615
1618
|
pass
|
1616
1619
|
|
1617
|
-
src = _run_test_mode(settings['src'], regex, timelapse, test_images, random_test)
|
1620
|
+
src = _run_test_mode(settings['src'], regex, settings['timelapse'], settings['test_images'], settings['random_test'])
|
1618
1621
|
settings['src'] = src
|
1619
1622
|
|
1620
1623
|
stack_path = os.path.join(src, 'stack')
|
@@ -1625,46 +1628,45 @@ def preprocess_img_data(settings):
|
|
1625
1628
|
if not os.path.exists(stack_path):
|
1626
1629
|
try:
|
1627
1630
|
if not img_format == None:
|
1628
|
-
|
1629
|
-
|
1630
|
-
|
1631
|
-
|
1632
|
-
|
1633
|
-
|
1634
|
-
|
1635
|
-
|
1636
|
-
|
1637
|
-
|
1638
|
-
#
|
1639
|
-
if
|
1640
|
-
|
1641
|
-
|
1642
|
-
|
1643
|
-
|
1644
|
-
|
1645
|
-
print(f"all images: {all_imgs}, full batch: {full_batches}, last batch: {last_batch_size}")
|
1646
|
-
raise ValueError("Last batch of size 1 detected. Adjust the batch size.")
|
1631
|
+
img_format = ['.tif', '.tiff', '.png', '.jpg', '.jpeg', '.bmp', '.nd2', '.czi', '.lif']
|
1632
|
+
_rename_and_organize_image_files(src, regex, settings['batch_size'], settings['metadata_type'], img_format)
|
1633
|
+
|
1634
|
+
#Make sure no batches will be of only one image
|
1635
|
+
all_imgs = len(stack_path)
|
1636
|
+
full_batches = all_imgs // settings['batch_size']
|
1637
|
+
last_batch_size = all_imgs % settings['batch_size']
|
1638
|
+
|
1639
|
+
# Check if the last batch is of size 1
|
1640
|
+
if last_batch_size == 1:
|
1641
|
+
# If there's only one batch and its size is 1, it's also an issue
|
1642
|
+
if full_batches == 0:
|
1643
|
+
raise ValueError("Only one batch of size 1 detected. Adjust the batch size.")
|
1644
|
+
# If the last batch is of size 1, merge it with the second last batch
|
1645
|
+
elif full_batches > 0:
|
1646
|
+
print(f"all images: {all_imgs}, full batch: {full_batches}, last batch: {last_batch_size}")
|
1647
|
+
raise ValueError("Last batch of size 1 detected. Adjust the batch size.")
|
1647
1648
|
|
1648
1649
|
nr_channel_folders = _merge_channels(src, plot=False)
|
1649
1650
|
|
1650
1651
|
if len(settings['channels']) != nr_channel_folders:
|
1651
1652
|
print(f"Number of channels does not match number of channel folders. channels: {settings['channels']} channel folders: {nr_channel_folders}")
|
1652
1653
|
new_channels = list(range(nr_channel_folders))
|
1653
|
-
print(f"
|
1654
|
+
print(f"Changing channels from {settings['channels']} to {new_channels}")
|
1654
1655
|
settings['channels'] = new_channels
|
1655
1656
|
|
1656
|
-
if timelapse:
|
1657
|
-
_create_movies_from_npy_per_channel(stack_path, fps=
|
1657
|
+
if settings['timelapse']:
|
1658
|
+
_create_movies_from_npy_per_channel(stack_path, fps=settings['fps'])
|
1658
1659
|
|
1659
|
-
if plot:
|
1660
|
-
print(f
|
1661
|
-
plot_arrays(stack_path, figuresize, cmap, nr=nr, normalize=normalize)
|
1660
|
+
if settings['plot']:
|
1661
|
+
print(f"plotting {settings['nr']} images from {src}/stack")
|
1662
|
+
plot_arrays(stack_path, settings['figuresize'], settings['cmap'], nr=settings['nr'], normalize=settings['normalize'])
|
1662
1663
|
|
1663
|
-
if all_to_mip:
|
1664
|
+
if settings['all_to_mip']:
|
1664
1665
|
_mip_all(stack_path)
|
1665
|
-
if plot:
|
1666
|
-
print(f
|
1667
|
-
plot_arrays(stack_path, figuresize, cmap, nr=nr, normalize=normalize)
|
1666
|
+
if settings['plot']:
|
1667
|
+
print(f"plotting {settings['nr']} images from {src}/stack")
|
1668
|
+
plot_arrays(stack_path, settings['figuresize'], settings['cmap'], nr=settings['nr'], normalize=settings['normalize'])
|
1669
|
+
|
1668
1670
|
except Exception as e:
|
1669
1671
|
print(f"Error: {e}")
|
1670
1672
|
|
@@ -1673,9 +1675,6 @@ def preprocess_img_data(settings):
|
|
1673
1675
|
save_dtype=np.float32,
|
1674
1676
|
settings=settings)
|
1675
1677
|
|
1676
|
-
#if plot:
|
1677
|
-
# _plot_4D_arrays(src+'/norm_channel_stack', nr_npz=1, nr=nr)
|
1678
|
-
|
1679
1678
|
return settings, src
|
1680
1679
|
|
1681
1680
|
def _check_masks(batch, batch_filenames, output_folder):
|
@@ -1780,11 +1779,11 @@ def _read_and_join_tables(db_path, table_names=['cell', 'cytoplasm', 'nucleus',
|
|
1780
1779
|
print(e)
|
1781
1780
|
conn.close()
|
1782
1781
|
if 'png_list' in dataframes:
|
1783
|
-
png_list_df = dataframes['png_list'][['cell_id', 'png_path', '
|
1782
|
+
png_list_df = dataframes['png_list'][['cell_id', 'png_path', 'plateID', 'rowID', 'columnID', 'fieldID']].copy()
|
1784
1783
|
png_list_df['cell_id'] = png_list_df['cell_id'].str[1:].astype(int)
|
1785
1784
|
png_list_df.rename(columns={'cell_id': 'object_label'}, inplace=True)
|
1786
1785
|
if 'cell' in dataframes:
|
1787
|
-
join_cols = ['object_label', '
|
1786
|
+
join_cols = ['object_label', 'plateID', 'rowID', 'columnID','fieldID']
|
1788
1787
|
dataframes['cell'] = pd.merge(dataframes['cell'], png_list_df, on=join_cols, how='left')
|
1789
1788
|
else:
|
1790
1789
|
print("Cell table not found in database tables.")
|
@@ -2085,14 +2084,18 @@ def _read_db(db_loc, tables):
|
|
2085
2084
|
Returns:
|
2086
2085
|
- dfs (list): A list of pandas DataFrames, each containing the data from a table.
|
2087
2086
|
"""
|
2088
|
-
from .utils import rename_columns_in_db
|
2087
|
+
from .utils import rename_columns_in_db, correct_metadata
|
2088
|
+
|
2089
2089
|
rename_columns_in_db(db_loc)
|
2090
2090
|
conn = sqlite3.connect(db_loc)
|
2091
2091
|
dfs = []
|
2092
|
+
|
2092
2093
|
for table in tables:
|
2093
2094
|
query = f'SELECT * FROM {table}'
|
2094
2095
|
df = pd.read_sql_query(query, conn)
|
2096
|
+
df = correct_metadata(df)
|
2095
2097
|
dfs.append(df)
|
2098
|
+
|
2096
2099
|
conn.close()
|
2097
2100
|
return dfs
|
2098
2101
|
|
@@ -2271,7 +2274,7 @@ def _copy_missclassified(df):
|
|
2271
2274
|
|
2272
2275
|
def _read_db(db_loc, tables):
|
2273
2276
|
|
2274
|
-
from .utils import rename_columns_in_db
|
2277
|
+
from .utils import rename_columns_in_db, correct_metadata
|
2275
2278
|
|
2276
2279
|
rename_columns_in_db(db_loc)
|
2277
2280
|
conn = sqlite3.connect(db_loc) # Create a connection to the database
|
@@ -2279,12 +2282,13 @@ def _read_db(db_loc, tables):
|
|
2279
2282
|
for table in tables:
|
2280
2283
|
query = f'SELECT * FROM {table}' # Write a SQL query to get the data from the database
|
2281
2284
|
df = pd.read_sql_query(query, conn) # Use the read_sql_query function to get the data and save it as a DataFrame
|
2285
|
+
df = correct_metadata(df)
|
2282
2286
|
dfs.append(df)
|
2283
2287
|
conn.close() # Close the connection
|
2284
2288
|
return dfs
|
2285
2289
|
|
2286
2290
|
def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_limit=10, change_plate=False):
|
2287
|
-
|
2291
|
+
|
2288
2292
|
from .utils import _split_data
|
2289
2293
|
|
2290
2294
|
# Initialize an empty dictionary to store DataFrames by table name
|
@@ -2294,8 +2298,8 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_
|
|
2294
2298
|
for idx, loc in enumerate(locs):
|
2295
2299
|
db_dfs = _read_db(loc, tables)
|
2296
2300
|
if change_plate:
|
2297
|
-
db_dfs['
|
2298
|
-
db_dfs['prc'] = db_dfs['
|
2301
|
+
db_dfs['plateID'] = f'plate{idx+1}'
|
2302
|
+
db_dfs['prc'] = db_dfs['plateID'].astype(str) + '_' + db_dfs['rowID'].astype(str) + '_' + db_dfs['columnID'].astype(str)
|
2299
2303
|
for table, df in zip(tables, db_dfs):
|
2300
2304
|
data_dict[table].append(df)
|
2301
2305
|
|
@@ -2303,6 +2307,7 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_
|
|
2303
2307
|
for table, dfs in data_dict.items():
|
2304
2308
|
if dfs:
|
2305
2309
|
data_dict[table] = pd.concat(dfs, axis=0)
|
2310
|
+
|
2306
2311
|
if verbose:
|
2307
2312
|
print(f"{table}: {len(data_dict[table])}")
|
2308
2313
|
|
@@ -2389,18 +2394,18 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_
|
|
2389
2394
|
if 'png_list' in data_dict:
|
2390
2395
|
png_list = data_dict['png_list'].copy()
|
2391
2396
|
png_list_g_df_numeric, png_list_g_df_non_numeric = _split_data(png_list, 'prcfo', 'cell_id')
|
2392
|
-
png_list_g_df_non_numeric.drop(columns=['
|
2397
|
+
png_list_g_df_non_numeric.drop(columns=['plateID','rowID','columnID','fieldID','file_name','cell_id', 'prcf'], inplace=True)
|
2393
2398
|
if verbose:
|
2394
2399
|
print(f'png_list: {len(png_list)}, png_list grouped: {len(png_list_g_df_numeric)}')
|
2395
2400
|
print(f"Added png_list columns: {png_list_g_df_numeric.columns}, {png_list_g_df_non_numeric.columns}")
|
2396
2401
|
merged_df = merged_df.merge(png_list_g_df_numeric, left_index=True, right_index=True)
|
2397
2402
|
merged_df = merged_df.merge(png_list_g_df_non_numeric, left_index=True, right_index=True)
|
2398
|
-
|
2403
|
+
|
2399
2404
|
# Add prc (plate row column) and prcfo (plate row column field object) columns
|
2400
|
-
metadata = metadata.assign(prc=lambda x: x['
|
2405
|
+
metadata = metadata.assign(prc=lambda x: x['plateID'] + '_' + x['rowID'] + '_' + x['columnID'])
|
2401
2406
|
cells_well = metadata.groupby('prc')['object_label'].nunique().reset_index(name='cells_per_well')
|
2402
2407
|
metadata = metadata.merge(cells_well, on='prc')
|
2403
|
-
metadata = metadata.assign(prcfo=lambda x: x['
|
2408
|
+
metadata = metadata.assign(prcfo=lambda x: x['plateID'] + '_' + x['rowID'] + '_' + x['columnID'] + '_' + x['fieldID'] + '_' + x['object_label'])
|
2404
2409
|
metadata.set_index('prcfo', inplace=True)
|
2405
2410
|
|
2406
2411
|
# Merge metadata with final merged DataFrame
|
@@ -2988,7 +2993,7 @@ def training_dataset_from_annotation(db_path, dst, annotation_column='test', ann
|
|
2988
2993
|
|
2989
2994
|
return class_paths
|
2990
2995
|
|
2991
|
-
def training_dataset_from_annotation_metadata(db_path, dst, annotation_column='test', annotated_classes=(1, 2), metadata_type_by='
|
2996
|
+
def training_dataset_from_annotation_metadata(db_path, dst, annotation_column='test', annotated_classes=(1, 2), metadata_type_by='columnID', class_metadata=['c1','c2']):
|
2992
2997
|
all_paths = []
|
2993
2998
|
|
2994
2999
|
# Connect to the database and retrieve the image paths and annotations
|
@@ -3010,9 +3015,9 @@ def training_dataset_from_annotation_metadata(db_path, dst, annotation_column='t
|
|
3010
3015
|
|
3011
3016
|
# Filter all_paths by metadata_type_by and class_metadata
|
3012
3017
|
filtered_paths = []
|
3013
|
-
metadata_index = {'
|
3018
|
+
metadata_index = {'rowID': 2, 'columnID': 3}.get(metadata_type_by, None)
|
3014
3019
|
if metadata_index is None:
|
3015
|
-
raise ValueError(f"Invalid metadata_type_by value: {metadata_type_by}. Must be '
|
3020
|
+
raise ValueError(f"Invalid metadata_type_by value: {metadata_type_by}. Must be 'rowID' or 'columnID'. {class_metadata} must be a list formatted as ['c1', 'c2'] or ['r1', 'r2']")
|
3016
3021
|
|
3017
3022
|
for row in all_paths:
|
3018
3023
|
if row[metadata_index] in class_metadata:
|
@@ -3102,4 +3107,474 @@ def generate_dataset_from_lists(dst, class_data, classes, test_split=0.1):
|
|
3102
3107
|
test_class_dir = os.path.join(dst, f'test/{cls}')
|
3103
3108
|
print(f'Train class {cls}: {len(os.listdir(train_class_dir))}, Test class {cls}: {len(os.listdir(test_class_dir))}')
|
3104
3109
|
|
3105
|
-
return os.path.join(dst, 'train'), os.path.join(dst, 'test')
|
3110
|
+
return os.path.join(dst, 'train'), os.path.join(dst, 'test')
|
3111
|
+
|
3112
|
+
def convert_separate_files_to_yokogawa(folder, regex):
|
3113
|
+
|
3114
|
+
ROWS = "ABCDEFGHIJKLMNOP"
|
3115
|
+
COLS = [f"{i:02d}" for i in range(1, 25)]
|
3116
|
+
WELLS = [f"{r}{c}" for r in ROWS for c in COLS]
|
3117
|
+
|
3118
|
+
def _get_next_well(used_wells):
|
3119
|
+
plate = 1
|
3120
|
+
for well in WELLS:
|
3121
|
+
well_name = f"plate{plate}_{well}"
|
3122
|
+
if well_name not in used_wells:
|
3123
|
+
return well_name
|
3124
|
+
if well == "P24":
|
3125
|
+
plate += 1
|
3126
|
+
return f"plate{plate}_A01"
|
3127
|
+
|
3128
|
+
pattern = re.compile(regex, re.I)
|
3129
|
+
|
3130
|
+
files_by_region = {}
|
3131
|
+
rename_log = []
|
3132
|
+
csv_path = os.path.join(folder, "rename_log.csv")
|
3133
|
+
used_wells = set()
|
3134
|
+
region_to_well = {}
|
3135
|
+
|
3136
|
+
# Group files by (plateID, wellID, fieldID, timeID, chanID)
|
3137
|
+
for file in os.listdir(folder):
|
3138
|
+
match = pattern.match(file)
|
3139
|
+
if not match:
|
3140
|
+
print(f"Skipping {file}: does not match regex.")
|
3141
|
+
continue
|
3142
|
+
|
3143
|
+
meta = match.groupdict()
|
3144
|
+
|
3145
|
+
# Mandatory metadata
|
3146
|
+
if 'wellID' not in meta or meta['wellID'] is None:
|
3147
|
+
print(f"Skipping {file}: missing mandatory wellID.")
|
3148
|
+
continue
|
3149
|
+
wellID = meta['wellID']
|
3150
|
+
|
3151
|
+
# Optional metadata with defaults
|
3152
|
+
plateID = meta.get('plateID', '1') or '1'
|
3153
|
+
fieldID = meta.get('fieldID', '1') or '1'
|
3154
|
+
timeID = int(meta.get('timeID', 1) or 1)
|
3155
|
+
chanID = int(meta.get('chanID', 1) or 1)
|
3156
|
+
sliceID = meta.get('sliceID')
|
3157
|
+
sliceID = int(sliceID) if sliceID is not None else None
|
3158
|
+
|
3159
|
+
region_key = (plateID, wellID, fieldID, timeID, chanID)
|
3160
|
+
|
3161
|
+
files_by_region.setdefault(region_key, []).append((file, sliceID))
|
3162
|
+
|
3163
|
+
# Assign wells and process files per region
|
3164
|
+
for region, file_list in files_by_region.items():
|
3165
|
+
if region[:3] not in region_to_well:
|
3166
|
+
next_well = _get_next_well(used_wells)
|
3167
|
+
region_to_well[region[:3]] = next_well
|
3168
|
+
used_wells.add(next_well)
|
3169
|
+
|
3170
|
+
assigned_well = region_to_well[region[:3]]
|
3171
|
+
plateID, wellID, fieldID, timeID, chanID = region
|
3172
|
+
|
3173
|
+
# Check if multiple slices exist and are meaningful
|
3174
|
+
slice_ids = [sid for _, sid in file_list if sid is not None]
|
3175
|
+
unique_slices = set(slice_ids)
|
3176
|
+
|
3177
|
+
images = []
|
3178
|
+
for filename, _ in sorted(file_list, key=lambda x: x[1] or 1):
|
3179
|
+
img = tifffile.imread(os.path.join(folder, filename))
|
3180
|
+
images.append(img)
|
3181
|
+
|
3182
|
+
# Perform MIP only if multiple unique slices are present
|
3183
|
+
if len(unique_slices) > 1:
|
3184
|
+
img_to_save = np.max(np.stack(images), axis=0)
|
3185
|
+
else:
|
3186
|
+
img_to_save = images[0]
|
3187
|
+
|
3188
|
+
dtype = img_to_save.dtype
|
3189
|
+
|
3190
|
+
new_filename = f"{assigned_well}_T{timeID:04d}F{int(fieldID):03d}L01C{chanID:02d}.tif"
|
3191
|
+
new_filepath = os.path.join(folder, new_filename)
|
3192
|
+
tifffile.imwrite(new_filepath, img_to_save.astype(dtype))
|
3193
|
+
|
3194
|
+
# Log original filenames involved in MIP or single file rename
|
3195
|
+
original_files = ";".join(f[0] for f in file_list)
|
3196
|
+
rename_log.append({"Original File(s)": original_files, "Renamed TIFF": new_filename})
|
3197
|
+
|
3198
|
+
pd.DataFrame(rename_log).to_csv(csv_path, index=False)
|
3199
|
+
print(f"Processing complete. Files saved in {folder} and rename log saved as {csv_path}.")
|
3200
|
+
|
3201
|
+
def convert_to_yokogawa(folder):
|
3202
|
+
"""
|
3203
|
+
Detects file type in the folder and converts them
|
3204
|
+
to Yokogawa-style naming with Maximum Intensity Projection (MIP).
|
3205
|
+
"""
|
3206
|
+
|
3207
|
+
def _get_next_well(used_wells):
|
3208
|
+
"""
|
3209
|
+
Determines the next available well position across multiple 384-well plates.
|
3210
|
+
"""
|
3211
|
+
ROWS = "ABCDEFGHIJKLMNOP"
|
3212
|
+
COLS = [f"{i:02d}" for i in range(1, 25)]
|
3213
|
+
WELLS = [f"{r}{c}" for r in ROWS for c in COLS]
|
3214
|
+
|
3215
|
+
plate = 1
|
3216
|
+
while True:
|
3217
|
+
for well in WELLS:
|
3218
|
+
well_name = f"plate{plate}_{well}"
|
3219
|
+
if well_name not in used_wells:
|
3220
|
+
used_wells.add(well_name)
|
3221
|
+
return well_name
|
3222
|
+
plate += 1 # All wells exhausted in current plate, increment to next plate
|
3223
|
+
|
3224
|
+
|
3225
|
+
# Define 384-well plate format
|
3226
|
+
ROWS = "ABCDEFGHIJKLMNOP"
|
3227
|
+
COLS = [f"{i:02d}" for i in range(1, 25)]
|
3228
|
+
WELLS = [f"{r}{c}" for r in ROWS for c in COLS]
|
3229
|
+
|
3230
|
+
filenames = []
|
3231
|
+
rename_log = []
|
3232
|
+
csv_path = os.path.join(folder, "rename_log.csv")
|
3233
|
+
used_wells = set()
|
3234
|
+
|
3235
|
+
# **Dictionary to store well assignments per original file**
|
3236
|
+
file_to_well = {}
|
3237
|
+
|
3238
|
+
for file in os.listdir(folder):
|
3239
|
+
path = os.path.join(folder, file)
|
3240
|
+
ext = file.lower().split('.')[-1]
|
3241
|
+
|
3242
|
+
# **Assign a well only once per original file**
|
3243
|
+
if file not in file_to_well:
|
3244
|
+
file_to_well[file] = _get_next_well(used_wells)
|
3245
|
+
#used_wells.add(file_to_well[file]) # Mark it as used
|
3246
|
+
|
3247
|
+
well = file_to_well[file] # Use the same well for all channels/times
|
3248
|
+
|
3249
|
+
### **Process Nikon ND2 Files**
|
3250
|
+
if ext == 'nd2':
|
3251
|
+
try:
|
3252
|
+
nd2 = ND2Reader(path)
|
3253
|
+
metadata = nd2.metadata
|
3254
|
+
|
3255
|
+
timepoints = list(range(len(metadata.get("frames", [0])))) or [0]
|
3256
|
+
fields = list(range(len(metadata.get("fields_of_view", [0])))) or [0]
|
3257
|
+
z_levels = list(metadata.get("z_levels", range(1))) if metadata.get("z_levels") else [0]
|
3258
|
+
channels = metadata.get("channels", [])
|
3259
|
+
|
3260
|
+
for t_idx in timepoints:
|
3261
|
+
for f_idx in fields:
|
3262
|
+
for c_idx, channel in enumerate(channels):
|
3263
|
+
try:
|
3264
|
+
mip_image = np.max.reduce([
|
3265
|
+
nd2.get_frame_2D(t=t_idx, v=f_idx, z=z_idx, c=c_idx)
|
3266
|
+
for z_idx in z_levels
|
3267
|
+
], axis=0)
|
3268
|
+
|
3269
|
+
dtype = mip_image.dtype
|
3270
|
+
filename = f"{well}_T{t_idx+1:04d}F{f_idx+1:03d}L01C{c_idx+1:02d}.tif"
|
3271
|
+
filepath = os.path.join(folder, filename)
|
3272
|
+
|
3273
|
+
tifffile.imwrite(filepath, mip_image.astype(dtype))
|
3274
|
+
rename_log.append({"Original File": file,
|
3275
|
+
"Renamed TIFF": filename,
|
3276
|
+
"ext": ext,
|
3277
|
+
"time": t_idx,
|
3278
|
+
"field": f_idx,
|
3279
|
+
"channel": channel,
|
3280
|
+
"z": z_levels})
|
3281
|
+
|
3282
|
+
except IndexError:
|
3283
|
+
print(f"Warning: ND2 file {file} has an incomplete data structure. Skipping.")
|
3284
|
+
|
3285
|
+
except Exception as e:
|
3286
|
+
print(f"Error processing ND2 file {file}: {e}")
|
3287
|
+
|
3288
|
+
elif ext == 'czi':
|
3289
|
+
try:
|
3290
|
+
# Open the CZI in streaming mode
|
3291
|
+
with pyczi.open_czi(path) as czidoc:
|
3292
|
+
|
3293
|
+
# 1) Global dimension ranges
|
3294
|
+
bbox = czidoc.total_bounding_box
|
3295
|
+
_, tlen = bbox.get('T', (0,1))
|
3296
|
+
_, clen = bbox.get('C', (0,1))
|
3297
|
+
_, zlen = bbox.get('Z', (0,1))
|
3298
|
+
|
3299
|
+
# 2) Scene → list of scene indices
|
3300
|
+
scenes_bb = czidoc.scenes_bounding_rectangle
|
3301
|
+
scenes = sorted(scenes_bb.keys()) if scenes_bb else [None]
|
3302
|
+
|
3303
|
+
# 3) Output folder (same as .czi)
|
3304
|
+
folder = os.path.dirname(path)
|
3305
|
+
|
3306
|
+
# 4) Loop scene × time × channel × Z
|
3307
|
+
for scene in scenes:
|
3308
|
+
# *** assign a unique well for this scene ***
|
3309
|
+
scene_well = _get_next_well(used_wells)
|
3310
|
+
|
3311
|
+
# Field index = scene+1 (or 1 if no scene)
|
3312
|
+
F_idx = scene + 1 if scene is not None else 1
|
3313
|
+
# Scene index for “A”
|
3314
|
+
A_idx = scene + 1 if scene is not None else 1
|
3315
|
+
|
3316
|
+
for t in range(tlen):
|
3317
|
+
for c in range(clen):
|
3318
|
+
for z in range(zlen):
|
3319
|
+
# Read exactly one 2D plane
|
3320
|
+
arr = czidoc.read(
|
3321
|
+
plane={'T': t, 'C': c, 'Z': z},
|
3322
|
+
scene=scene
|
3323
|
+
)
|
3324
|
+
plane = np.squeeze(arr)
|
3325
|
+
|
3326
|
+
# Build Yokogawa‐style filename:
|
3327
|
+
fn = (
|
3328
|
+
f"{scene_well}_"
|
3329
|
+
f"T{t+1:04d}"
|
3330
|
+
f"F{F_idx:03d}"
|
3331
|
+
f"L01"
|
3332
|
+
f"A{A_idx:02d}"
|
3333
|
+
f"Z{z+1:02d}"
|
3334
|
+
f"C{c+1:02d}.tif"
|
3335
|
+
)
|
3336
|
+
outpath = os.path.join(folder, fn)
|
3337
|
+
|
3338
|
+
# Write with lossless compression
|
3339
|
+
tifffile.imwrite(
|
3340
|
+
outpath,
|
3341
|
+
plane.astype(plane.dtype),
|
3342
|
+
compression='zlib'
|
3343
|
+
)
|
3344
|
+
|
3345
|
+
# Log it
|
3346
|
+
rename_log.append({
|
3347
|
+
"Original File": file,
|
3348
|
+
"Renamed TIFF": fn,
|
3349
|
+
"ext": ext,
|
3350
|
+
"scene": scene,
|
3351
|
+
"time": t,
|
3352
|
+
"slice": z,
|
3353
|
+
"field": F_idx,
|
3354
|
+
"channel": c,
|
3355
|
+
"well": scene_well
|
3356
|
+
})
|
3357
|
+
|
3358
|
+
except Exception as e:
|
3359
|
+
print(f"Error processing CZI file {file}: {e}")
|
3360
|
+
|
3361
|
+
### **Process Leica LIF Files**
|
3362
|
+
elif ext == 'lif':
|
3363
|
+
try:
|
3364
|
+
lif_file = readlif.Reader(path)
|
3365
|
+
|
3366
|
+
for image_idx, image in enumerate(lif_file.getIterImage()):
|
3367
|
+
timepoints = range(getattr(image.dims, 't', 1))
|
3368
|
+
z_levels = range(getattr(image.dims, 'z', 1))
|
3369
|
+
channels = range(getattr(image.dims, 'c', 1))
|
3370
|
+
|
3371
|
+
for t_idx in timepoints:
|
3372
|
+
for c_idx in channels:
|
3373
|
+
z_stack = []
|
3374
|
+
for z_idx in z_levels:
|
3375
|
+
try:
|
3376
|
+
frame = image.getFrame(z=z_idx, t=t_idx, c=c_idx)
|
3377
|
+
z_stack.append(frame)
|
3378
|
+
except IndexError:
|
3379
|
+
print(f"Missing frame: T{t_idx}, Z{z_idx}, C{c_idx} in {file}, skipping frame.")
|
3380
|
+
|
3381
|
+
if z_stack:
|
3382
|
+
mip_image = np.max(np.stack(z_stack), axis=0)
|
3383
|
+
dtype = mip_image.dtype
|
3384
|
+
filename = f"{well}_T{t_idx+1:04d}F{image_idx+1:03d}L01C{c_idx+1:02d}.tif"
|
3385
|
+
filepath = os.path.join(folder, filename)
|
3386
|
+
|
3387
|
+
tifffile.imwrite(filepath, mip_image.astype(dtype))
|
3388
|
+
rename_log.append({"Original File": file, "Renamed TIFF": filename})
|
3389
|
+
|
3390
|
+
except Exception as e:
|
3391
|
+
print(f"Error processing LIF file {file}: {e}")
|
3392
|
+
|
3393
|
+
### **Process Standard Image Files (TIFF, PNG, JPEG, BMP)**
|
3394
|
+
elif ext in ['tif', 'tiff', 'png', 'jpg', 'jpeg', 'bmp'] and not file.startswith("plate"):
|
3395
|
+
try:
|
3396
|
+
with tifffile.TiffFile(path) as tif:
|
3397
|
+
images = tif.asarray()
|
3398
|
+
ndim = images.ndim
|
3399
|
+
|
3400
|
+
# Defaults
|
3401
|
+
t_dim = z_dim = c_dim = 1
|
3402
|
+
|
3403
|
+
# Determine dimensions more explicitly
|
3404
|
+
if ndim == 2:
|
3405
|
+
mip_image = images
|
3406
|
+
filename = f"{well}_T0001F001L01C01.tif"
|
3407
|
+
tifffile.imwrite(os.path.join(folder, filename), mip_image)
|
3408
|
+
rename_log.append({"Original File": file, "Renamed TIFF": filename})
|
3409
|
+
continue
|
3410
|
+
|
3411
|
+
elif ndim == 3:
|
3412
|
+
if images.shape[0] <= 4: # Likely channels
|
3413
|
+
c_dim = images.shape[0]
|
3414
|
+
for c in range(c_dim):
|
3415
|
+
mip_image = images[c, :, :]
|
3416
|
+
filename = f"{well}_T0001F001L01C{c+1:02d}.tif"
|
3417
|
+
tifffile.imwrite(os.path.join(folder, filename), mip_image)
|
3418
|
+
rename_log.append({"Original File": file, "Renamed TIFF": filename})
|
3419
|
+
else: # Z-stack
|
3420
|
+
mip_image = np.max(images, axis=0)
|
3421
|
+
filename = f"{well}_T0001F001L01C01.tif"
|
3422
|
+
tifffile.imwrite(os.path.join(folder, filename), mip_image)
|
3423
|
+
rename_log.append({"Original File": file, "Renamed TIFF": filename})
|
3424
|
+
|
3425
|
+
elif ndim == 4:
|
3426
|
+
t_dim, z_dim, y_dim, x_dim = images.shape
|
3427
|
+
for t in range(t_dim):
|
3428
|
+
mip_image = np.max(images[t, :, :, :], axis=0)
|
3429
|
+
filename = f"{well}_T{t+1:04d}F001L01C01.tif"
|
3430
|
+
tifffile.imwrite(os.path.join(folder, filename), mip_image)
|
3431
|
+
rename_log.append({"Original File": file, "Renamed TIFF": filename})
|
3432
|
+
|
3433
|
+
else:
|
3434
|
+
raise ValueError(f"Unsupported TIFF dimensions: {images.shape}")
|
3435
|
+
|
3436
|
+
except Exception as e:
|
3437
|
+
print(f"Error processing standard image file {file}: {e}")
|
3438
|
+
|
3439
|
+
# Save rename log as CSV
|
3440
|
+
pd.DataFrame(rename_log).to_csv(csv_path, index=False)
|
3441
|
+
print(f"Processing complete. Files saved in {folder} and rename log saved as {csv_path}.")
|
3442
|
+
|
3443
|
+
def apply_augmentation(image, method):
|
3444
|
+
if method == 'rotate90':
|
3445
|
+
return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
|
3446
|
+
elif method == 'rotate180':
|
3447
|
+
return cv2.rotate(image, cv2.ROTATE_180)
|
3448
|
+
elif method == 'rotate270':
|
3449
|
+
return cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
|
3450
|
+
elif method == 'flip_h':
|
3451
|
+
return cv2.flip(image, 1)
|
3452
|
+
elif method == 'flip_v':
|
3453
|
+
return cv2.flip(image, 0)
|
3454
|
+
return image
|
3455
|
+
|
3456
|
+
def process_instruction(entry):
|
3457
|
+
img = tifffile.imread(entry["src_img"])
|
3458
|
+
msk = tifffile.imread(entry["src_msk"])
|
3459
|
+
if entry["augment"]:
|
3460
|
+
img = apply_augmentation(img, entry["augment"])
|
3461
|
+
msk = apply_augmentation(msk, entry["augment"])
|
3462
|
+
tifffile.imwrite(entry["dst_img"], img)
|
3463
|
+
tifffile.imwrite(entry["dst_msk"], msk)
|
3464
|
+
return 1
|
3465
|
+
|
3466
|
+
def prepare_cellpose_dataset(input_root, augment_data=False, train_fraction=0.8, n_jobs=None):
|
3467
|
+
|
3468
|
+
from .utils import print_progress
|
3469
|
+
|
3470
|
+
time_ls = []
|
3471
|
+
input_root = os.path.abspath(input_root)
|
3472
|
+
output_root = os.path.join(input_root, "cellpose_dataset")
|
3473
|
+
|
3474
|
+
def get_augmentations():
|
3475
|
+
return ['rotate90', 'rotate180', 'rotate270', 'flip_h', 'flip_v']
|
3476
|
+
|
3477
|
+
def find_image_mask_pairs(dataset_path):
|
3478
|
+
mask_dir = os.path.join(dataset_path, "masks")
|
3479
|
+
pairs = []
|
3480
|
+
for fname in os.listdir(dataset_path):
|
3481
|
+
if fname.lower().endswith((".tif", ".tiff")):
|
3482
|
+
img_path = os.path.join(dataset_path, fname)
|
3483
|
+
msk_path = os.path.join(mask_dir, fname)
|
3484
|
+
if os.path.isfile(msk_path):
|
3485
|
+
pairs.append((img_path, msk_path))
|
3486
|
+
return pairs
|
3487
|
+
|
3488
|
+
def prepare_output_folders(base):
|
3489
|
+
for subset in ["train", "test"]:
|
3490
|
+
os.makedirs(os.path.join(base, subset, "images"), exist_ok=True)
|
3491
|
+
os.makedirs(os.path.join(base, subset, "masks"), exist_ok=True)
|
3492
|
+
|
3493
|
+
print("Scanning datasets...")
|
3494
|
+
datasets = []
|
3495
|
+
for subdir in os.listdir(input_root):
|
3496
|
+
dataset_path = os.path.join(input_root, subdir)
|
3497
|
+
if os.path.isdir(dataset_path) and os.path.isdir(os.path.join(dataset_path, "masks")):
|
3498
|
+
pairs = find_image_mask_pairs(dataset_path)
|
3499
|
+
if pairs:
|
3500
|
+
datasets.append(pairs)
|
3501
|
+
print(f" Found {len(pairs)} images in {dataset_path}")
|
3502
|
+
|
3503
|
+
if not datasets:
|
3504
|
+
raise ValueError("No valid datasets with images and masks found.")
|
3505
|
+
|
3506
|
+
prepare_output_folders(output_root)
|
3507
|
+
|
3508
|
+
min_size = min(len(pairs) for pairs in datasets)
|
3509
|
+
target_size = min_size if not augment_data else max(len(pairs) for pairs in datasets)
|
3510
|
+
|
3511
|
+
print("\nPreparing instruction list...")
|
3512
|
+
instructions = []
|
3513
|
+
global_index = 0
|
3514
|
+
|
3515
|
+
for pairs in datasets:
|
3516
|
+
dataset_len = len(pairs)
|
3517
|
+
|
3518
|
+
# --- Step 1: Sample or augment ---
|
3519
|
+
sampled_pairs = []
|
3520
|
+
if dataset_len >= target_size:
|
3521
|
+
sampled_pairs = random.sample(pairs, target_size)
|
3522
|
+
else:
|
3523
|
+
sampled_pairs = pairs.copy()
|
3524
|
+
if augment_data:
|
3525
|
+
needed = target_size - dataset_len
|
3526
|
+
aug_methods = get_augmentations()
|
3527
|
+
full_loops = needed // len(aug_methods)
|
3528
|
+
extra = needed % len(aug_methods)
|
3529
|
+
|
3530
|
+
for _ in range(full_loops):
|
3531
|
+
for (img_path, msk_path), aug in zip(pairs, aug_methods * (dataset_len // len(aug_methods))):
|
3532
|
+
sampled_pairs.append((img_path, msk_path, aug))
|
3533
|
+
if extra > 0:
|
3534
|
+
subset = random.sample(pairs * ((extra // len(aug_methods)) + 1), extra)
|
3535
|
+
for (img_path, msk_path), aug in zip(subset, aug_methods[:extra]):
|
3536
|
+
sampled_pairs.append((img_path, msk_path, aug))
|
3537
|
+
|
3538
|
+
# Add "no augmentation" tag to original files
|
3539
|
+
augmented_sampled = [
|
3540
|
+
(tup[0], tup[1], None) if len(tup) == 2 else tup
|
3541
|
+
for tup in sampled_pairs
|
3542
|
+
]
|
3543
|
+
|
3544
|
+
# --- Step 2: Split into train/test ---
|
3545
|
+
random.shuffle(augmented_sampled)
|
3546
|
+
split_idx = int(train_fraction * len(augmented_sampled))
|
3547
|
+
split_sets = {
|
3548
|
+
"train": augmented_sampled[:split_idx],
|
3549
|
+
"test": augmented_sampled[split_idx:]
|
3550
|
+
}
|
3551
|
+
|
3552
|
+
for subset, items in split_sets.items():
|
3553
|
+
for img_path, msk_path, aug in items:
|
3554
|
+
dst_img = os.path.join(output_root, subset, "images", f"{global_index:05d}.tif")
|
3555
|
+
dst_msk = os.path.join(output_root, subset, "masks", f"{global_index:05d}.tif")
|
3556
|
+
instructions.append({
|
3557
|
+
"src_img": img_path,
|
3558
|
+
"src_msk": msk_path,
|
3559
|
+
"dst_img": dst_img,
|
3560
|
+
"dst_msk": dst_msk,
|
3561
|
+
"augment": aug
|
3562
|
+
})
|
3563
|
+
global_index += 1
|
3564
|
+
|
3565
|
+
print(f"Total files to process: {len(instructions)}")
|
3566
|
+
|
3567
|
+
# --- Step 3: Process with multiprocessing ---
|
3568
|
+
print("Processing images with multiprocessing...")
|
3569
|
+
|
3570
|
+
if n_jobs is None:
|
3571
|
+
n_jobs = max(1, cpu_count() - 1)
|
3572
|
+
else:
|
3573
|
+
n_jobs = int(n_jobs)
|
3574
|
+
|
3575
|
+
with Pool(n_jobs) as pool:
|
3576
|
+
for i, _ in enumerate(pool.imap_unordered(process_instruction, instructions), 1):
|
3577
|
+
print_progress(i, len(instructions), n_jobs=n_jobs, time_ls=time_ls, batch_size=None, operation_type="cellpose dataset")
|
3578
|
+
|
3579
|
+
print(f"Done. Dataset saved to: {output_root}")
|
3580
|
+
|