spacr 0.4.12__py3-none-any.whl → 0.4.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/core.py +54 -8
- spacr/deep_spacr.py +2 -3
- spacr/gui_core.py +259 -75
- spacr/gui_elements.py +133 -2
- spacr/gui_utils.py +24 -20
- spacr/io.py +553 -61
- spacr/measure.py +11 -12
- spacr/ml.py +141 -258
- spacr/plot.py +76 -34
- spacr/sequencing.py +73 -38
- spacr/settings.py +160 -93
- spacr/submodules.py +620 -214
- spacr/timelapse.py +25 -25
- spacr/toxo.py +23 -23
- spacr/utils.py +249 -95
- {spacr-0.4.12.dist-info → spacr-0.4.60.dist-info}/METADATA +2 -1
- {spacr-0.4.12.dist-info → spacr-0.4.60.dist-info}/RECORD +21 -21
- {spacr-0.4.12.dist-info → spacr-0.4.60.dist-info}/LICENSE +0 -0
- {spacr-0.4.12.dist-info → spacr-0.4.60.dist-info}/WHEEL +0 -0
- {spacr-0.4.12.dist-info → spacr-0.4.60.dist-info}/entry_points.txt +0 -0
- {spacr-0.4.12.dist-info → spacr-0.4.60.dist-info}/top_level.txt +0 -0
spacr/io.py
CHANGED
@@ -23,6 +23,7 @@ import seaborn as sns
|
|
23
23
|
from nd2reader import ND2Reader
|
24
24
|
from torchvision import transforms
|
25
25
|
from sklearn.model_selection import train_test_split
|
26
|
+
import readlif
|
26
27
|
|
27
28
|
def process_non_tif_non_2D_images(folder):
|
28
29
|
"""Processes all images in the folder and splits them into grayscale channels, preserving bit depth."""
|
@@ -131,58 +132,61 @@ def process_non_tif_non_2D_images(folder):
|
|
131
132
|
|
132
133
|
def _load_images_and_labels(image_files, label_files, invert=False):
|
133
134
|
|
134
|
-
from .utils import invert_image
|
135
|
+
from .utils import invert_image
|
135
136
|
|
136
137
|
images = []
|
137
138
|
labels = []
|
138
|
-
|
139
|
-
if not image_files is None:
|
140
|
-
image_names = sorted([os.path.basename(f) for f in image_files])
|
141
|
-
else:
|
142
|
-
image_names = []
|
143
|
-
|
144
|
-
if not label_files is None:
|
145
|
-
label_names = sorted([os.path.basename(f) for f in label_files])
|
146
|
-
else:
|
147
|
-
label_names = []
|
148
139
|
|
149
|
-
|
140
|
+
image_names = sorted([os.path.basename(f) for f in image_files]) if image_files else []
|
141
|
+
label_names = sorted([os.path.basename(f) for f in label_files]) if label_files else []
|
142
|
+
|
143
|
+
if image_files and label_files:
|
150
144
|
for img_file, lbl_file in zip(image_files, label_files):
|
151
145
|
image = cellpose.io.imread(img_file)
|
146
|
+
if image is None:
|
147
|
+
print(f"WARNING: Could not load image: {img_file}")
|
148
|
+
continue
|
152
149
|
if invert:
|
153
150
|
image = invert_image(image)
|
154
|
-
label = cellpose.io.imread(lbl_file)
|
155
151
|
if image.max() > 1:
|
156
152
|
image = image / image.max()
|
153
|
+
|
154
|
+
label = cellpose.io.imread(lbl_file)
|
155
|
+
if label is None:
|
156
|
+
print(f"WARNING: Could not load label: {lbl_file}")
|
157
|
+
continue
|
158
|
+
|
157
159
|
images.append(image)
|
158
160
|
labels.append(label)
|
159
|
-
|
161
|
+
|
162
|
+
elif image_files:
|
160
163
|
for img_file in image_files:
|
161
164
|
image = cellpose.io.imread(img_file)
|
165
|
+
if image is None:
|
166
|
+
print(f"WARNING: Could not load image: {img_file}")
|
167
|
+
continue
|
162
168
|
if invert:
|
163
169
|
image = invert_image(image)
|
164
170
|
if image.max() > 1:
|
165
171
|
image = image / image.max()
|
166
172
|
images.append(image)
|
167
|
-
|
168
|
-
|
169
|
-
|
173
|
+
|
174
|
+
elif label_files:
|
175
|
+
for lbl_file in label_files:
|
176
|
+
label = cellpose.io.imread(lbl_file)
|
177
|
+
if label is None:
|
178
|
+
print(f"WARNING: Could not load label: {lbl_file}")
|
179
|
+
continue
|
170
180
|
labels.append(label)
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
image_dir = None
|
176
|
-
|
177
|
-
if not label_files is None:
|
178
|
-
label_dir = os.path.dirname(label_files[0])
|
179
|
-
else:
|
180
|
-
label_dir = None
|
181
|
-
|
182
|
-
# Log the number of loaded images and labels
|
181
|
+
|
182
|
+
image_dir = os.path.dirname(image_files[0]) if image_files else None
|
183
|
+
label_dir = os.path.dirname(label_files[0]) if label_files else None
|
184
|
+
|
183
185
|
print(f'Loaded {len(images)} images and {len(labels)} labels from {image_dir} and {label_dir}')
|
184
|
-
if
|
185
|
-
print(f'image shape: {images[0].shape}, image type: images[0].
|
186
|
+
if images and labels:
|
187
|
+
print(f'image shape: {images[0].shape}, image type: {images[0].dtype}; '
|
188
|
+
f'label shape: {labels[0].shape}, label type: {labels[0].dtype}')
|
189
|
+
|
186
190
|
return images, labels, image_names, label_names
|
187
191
|
|
188
192
|
def _load_normalized_images_and_labels(image_files, label_files, channels=None, percentiles=None,
|
@@ -647,7 +651,7 @@ def load_images_from_paths(images_by_key):
|
|
647
651
|
|
648
652
|
return images_dict
|
649
653
|
|
650
|
-
#@log_function_call
|
654
|
+
#@log_function_call
|
651
655
|
def _rename_and_organize_image_files(src, regex, batch_size=100, pick_slice=False, skip_mode='01', metadata_type='', img_format='.tif'):
|
652
656
|
"""
|
653
657
|
Convert z-stack images to maximum intensity projection (MIP) images.
|
@@ -664,13 +668,16 @@ def _rename_and_organize_image_files(src, regex, batch_size=100, pick_slice=Fals
|
|
664
668
|
None
|
665
669
|
"""
|
666
670
|
|
671
|
+
if isinstance(img_format, str):
|
672
|
+
img_format = [img_format]
|
673
|
+
|
667
674
|
from .utils import _extract_filename_metadata, print_progress
|
668
675
|
|
669
676
|
regular_expression = re.compile(regex)
|
670
677
|
stack_path = os.path.join(src, 'stack')
|
671
678
|
files_processed = 0
|
672
679
|
if not os.path.exists(stack_path) or (os.path.isdir(stack_path) and len(os.listdir(stack_path)) == 0):
|
673
|
-
all_filenames = [filename for filename in os.listdir(src) if filename.endswith(img_format)]
|
680
|
+
all_filenames = [filename for filename in os.listdir(src) if any(filename.endswith(ext) for ext in img_format)]
|
674
681
|
print(f'All files: {len(all_filenames)} in {src}')
|
675
682
|
time_ls = []
|
676
683
|
image_paths_by_key = _extract_filename_metadata(all_filenames, src, regular_expression, metadata_type, pick_slice, skip_mode)
|
@@ -729,11 +736,11 @@ def _rename_and_organize_image_files(src, regex, batch_size=100, pick_slice=Fals
|
|
729
736
|
images_by_key.clear()
|
730
737
|
|
731
738
|
# Move original images to a new directory
|
732
|
-
valid_exts = [img_format]
|
733
739
|
newpath = os.path.join(src, 'orig')
|
734
740
|
os.makedirs(newpath, exist_ok=True)
|
735
741
|
for filename in os.listdir(src):
|
736
|
-
|
742
|
+
#print(f"{filename}: {os.path.splitext(filename)[1]}")
|
743
|
+
if os.path.splitext(filename)[1] in img_format:
|
737
744
|
move = os.path.join(newpath, filename)
|
738
745
|
if os.path.exists(move):
|
739
746
|
print(f'WARNING: A file with the same name already exists at location {move}')
|
@@ -891,11 +898,16 @@ def _merge_channels(src, plot=False):
|
|
891
898
|
from .utils import print_progress
|
892
899
|
|
893
900
|
stack_dir = os.path.join(src, 'stack')
|
894
|
-
allowed_names = ['01', '02', '03', '04', '00', '1', '2', '3', '4', '0']
|
901
|
+
#allowed_names = ['01', '02', '03', '04', '00', '1', '2', '3', '4', '0']
|
902
|
+
|
903
|
+
string_list = [str(i) for i in range(101)]+[f"{i:02d}" for i in range(10)]
|
904
|
+
allowed_names = sorted(string_list, key=lambda x: int(x))
|
895
905
|
|
896
906
|
# List directories that match the allowed names
|
897
907
|
chan_dirs = [d for d in os.listdir(src) if os.path.isdir(os.path.join(src, d)) and d in allowed_names]
|
898
908
|
chan_dirs.sort()
|
909
|
+
|
910
|
+
num_matching_folders = len(chan_dirs)
|
899
911
|
|
900
912
|
print(f'List of folders in src: {chan_dirs}. Single channel folders.')
|
901
913
|
|
@@ -925,7 +937,7 @@ def _merge_channels(src, plot=False):
|
|
925
937
|
if plot:
|
926
938
|
plot_arrays(os.path.join(src, 'stack'))
|
927
939
|
|
928
|
-
return
|
940
|
+
return num_matching_folders
|
929
941
|
|
930
942
|
def _mip_all(src, include_first_chan=True):
|
931
943
|
|
@@ -1566,7 +1578,7 @@ def preprocess_img_data(settings):
|
|
1566
1578
|
Returns:
|
1567
1579
|
None
|
1568
1580
|
"""
|
1569
|
-
|
1581
|
+
|
1570
1582
|
src = settings['src']
|
1571
1583
|
valid_ext = ['tif', 'tiff', 'png', 'jpeg']
|
1572
1584
|
files = os.listdir(src)
|
@@ -1584,10 +1596,6 @@ def preprocess_img_data(settings):
|
|
1584
1596
|
else:
|
1585
1597
|
print(f'Could not find any {valid_ext} files in {src} only found {extension_counts[0]}')
|
1586
1598
|
|
1587
|
-
|
1588
|
-
|
1589
|
-
|
1590
|
-
|
1591
1599
|
if os.path.exists(os.path.join(src,'stack')):
|
1592
1600
|
print('Found existing stack folder.')
|
1593
1601
|
if os.path.exists(os.path.join(src,'channel_stack')):
|
@@ -1598,11 +1606,11 @@ def preprocess_img_data(settings):
|
|
1598
1606
|
|
1599
1607
|
mask_channels = [settings['nucleus_channel'], settings['cell_channel'], settings['pathogen_channel']]
|
1600
1608
|
backgrounds = [settings['nucleus_background'], settings['cell_background'], settings['pathogen_background']]
|
1601
|
-
|
1609
|
+
|
1602
1610
|
settings, metadata_type, custom_regex, nr, plot, batch_size, timelapse, lower_percentile, randomize, all_to_mip, pick_slice, skip_mode, cmap, figuresize, normalize, save_dtype, test_mode, test_images, random_test = set_default_settings_preprocess_img_data(settings)
|
1603
|
-
|
1611
|
+
|
1604
1612
|
regex = _get_regex(metadata_type, img_format, custom_regex)
|
1605
|
-
|
1613
|
+
|
1606
1614
|
if test_mode:
|
1607
1615
|
|
1608
1616
|
print(f'Running spacr in test mode')
|
@@ -1611,6 +1619,8 @@ def preprocess_img_data(settings):
|
|
1611
1619
|
os.rmdir(os.path.join(src, 'test'))
|
1612
1620
|
print(f"Deleted test directory: {os.path.join(src, 'test')}")
|
1613
1621
|
except OSError as e:
|
1622
|
+
print(f"Error deleting test directory: {e}")
|
1623
|
+
print(f"Delete manually before running test mode")
|
1614
1624
|
pass
|
1615
1625
|
|
1616
1626
|
src = _run_test_mode(settings['src'], regex, timelapse, test_images, random_test)
|
@@ -1627,6 +1637,7 @@ def preprocess_img_data(settings):
|
|
1627
1637
|
if timelapse:
|
1628
1638
|
_move_to_chan_folder(src, regex, timelapse, metadata_type)
|
1629
1639
|
else:
|
1640
|
+
img_format = ['.tif', '.tiff', '.png', '.jpg', '.jpeg', '.bmp', '.nd2', '.czi', '.lif']
|
1630
1641
|
_rename_and_organize_image_files(src, regex, batch_size, pick_slice, skip_mode, metadata_type, img_format)
|
1631
1642
|
|
1632
1643
|
#Make sure no batches will be of only one image
|
@@ -1644,7 +1655,13 @@ def preprocess_img_data(settings):
|
|
1644
1655
|
print(f"all images: {all_imgs}, full batch: {full_batches}, last batch: {last_batch_size}")
|
1645
1656
|
raise ValueError("Last batch of size 1 detected. Adjust the batch size.")
|
1646
1657
|
|
1647
|
-
_merge_channels(src, plot=False)
|
1658
|
+
nr_channel_folders = _merge_channels(src, plot=False)
|
1659
|
+
|
1660
|
+
if len(settings['channels']) != nr_channel_folders:
|
1661
|
+
print(f"Number of channels does not match number of channel folders. channels: {settings['channels']} channel folders: {nr_channel_folders}")
|
1662
|
+
new_channels = list(range(nr_channel_folders))
|
1663
|
+
print(f"Changing channels from {settings['channels']} to {new_channels}")
|
1664
|
+
settings['channels'] = new_channels
|
1648
1665
|
|
1649
1666
|
if timelapse:
|
1650
1667
|
_create_movies_from_npy_per_channel(stack_path, fps=2)
|
@@ -1773,11 +1790,11 @@ def _read_and_join_tables(db_path, table_names=['cell', 'cytoplasm', 'nucleus',
|
|
1773
1790
|
print(e)
|
1774
1791
|
conn.close()
|
1775
1792
|
if 'png_list' in dataframes:
|
1776
|
-
png_list_df = dataframes['png_list'][['cell_id', 'png_path', '
|
1793
|
+
png_list_df = dataframes['png_list'][['cell_id', 'png_path', 'plateID', 'rowID', 'columnID', 'fieldID']].copy()
|
1777
1794
|
png_list_df['cell_id'] = png_list_df['cell_id'].str[1:].astype(int)
|
1778
1795
|
png_list_df.rename(columns={'cell_id': 'object_label'}, inplace=True)
|
1779
1796
|
if 'cell' in dataframes:
|
1780
|
-
join_cols = ['object_label', '
|
1797
|
+
join_cols = ['object_label', 'plateID', 'rowID', 'columnID','fieldID']
|
1781
1798
|
dataframes['cell'] = pd.merge(dataframes['cell'], png_list_df, on=join_cols, how='left')
|
1782
1799
|
else:
|
1783
1800
|
print("Cell table not found in database tables.")
|
@@ -2078,14 +2095,18 @@ def _read_db(db_loc, tables):
|
|
2078
2095
|
Returns:
|
2079
2096
|
- dfs (list): A list of pandas DataFrames, each containing the data from a table.
|
2080
2097
|
"""
|
2081
|
-
from .utils import rename_columns_in_db
|
2098
|
+
from .utils import rename_columns_in_db, correct_metadata
|
2099
|
+
|
2082
2100
|
rename_columns_in_db(db_loc)
|
2083
2101
|
conn = sqlite3.connect(db_loc)
|
2084
2102
|
dfs = []
|
2103
|
+
|
2085
2104
|
for table in tables:
|
2086
2105
|
query = f'SELECT * FROM {table}'
|
2087
2106
|
df = pd.read_sql_query(query, conn)
|
2107
|
+
df = correct_metadata(df)
|
2088
2108
|
dfs.append(df)
|
2109
|
+
|
2089
2110
|
conn.close()
|
2090
2111
|
return dfs
|
2091
2112
|
|
@@ -2264,7 +2285,7 @@ def _copy_missclassified(df):
|
|
2264
2285
|
|
2265
2286
|
def _read_db(db_loc, tables):
|
2266
2287
|
|
2267
|
-
from .utils import rename_columns_in_db
|
2288
|
+
from .utils import rename_columns_in_db, correct_metadata
|
2268
2289
|
|
2269
2290
|
rename_columns_in_db(db_loc)
|
2270
2291
|
conn = sqlite3.connect(db_loc) # Create a connection to the database
|
@@ -2272,12 +2293,13 @@ def _read_db(db_loc, tables):
|
|
2272
2293
|
for table in tables:
|
2273
2294
|
query = f'SELECT * FROM {table}' # Write a SQL query to get the data from the database
|
2274
2295
|
df = pd.read_sql_query(query, conn) # Use the read_sql_query function to get the data and save it as a DataFrame
|
2296
|
+
df = correct_metadata(df)
|
2275
2297
|
dfs.append(df)
|
2276
2298
|
conn.close() # Close the connection
|
2277
2299
|
return dfs
|
2278
2300
|
|
2279
2301
|
def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_limit=10, change_plate=False):
|
2280
|
-
|
2302
|
+
|
2281
2303
|
from .utils import _split_data
|
2282
2304
|
|
2283
2305
|
# Initialize an empty dictionary to store DataFrames by table name
|
@@ -2287,8 +2309,8 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_
|
|
2287
2309
|
for idx, loc in enumerate(locs):
|
2288
2310
|
db_dfs = _read_db(loc, tables)
|
2289
2311
|
if change_plate:
|
2290
|
-
db_dfs['
|
2291
|
-
db_dfs['prc'] = db_dfs['
|
2312
|
+
db_dfs['plateID'] = f'plate{idx+1}'
|
2313
|
+
db_dfs['prc'] = db_dfs['plateID'].astype(str) + '_' + db_dfs['rowID'].astype(str) + '_' + db_dfs['columnID'].astype(str)
|
2292
2314
|
for table, df in zip(tables, db_dfs):
|
2293
2315
|
data_dict[table].append(df)
|
2294
2316
|
|
@@ -2296,6 +2318,7 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_
|
|
2296
2318
|
for table, dfs in data_dict.items():
|
2297
2319
|
if dfs:
|
2298
2320
|
data_dict[table] = pd.concat(dfs, axis=0)
|
2321
|
+
|
2299
2322
|
if verbose:
|
2300
2323
|
print(f"{table}: {len(data_dict[table])}")
|
2301
2324
|
|
@@ -2382,18 +2405,18 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_
|
|
2382
2405
|
if 'png_list' in data_dict:
|
2383
2406
|
png_list = data_dict['png_list'].copy()
|
2384
2407
|
png_list_g_df_numeric, png_list_g_df_non_numeric = _split_data(png_list, 'prcfo', 'cell_id')
|
2385
|
-
png_list_g_df_non_numeric.drop(columns=['
|
2408
|
+
png_list_g_df_non_numeric.drop(columns=['plateID','rowID','columnID','fieldID','file_name','cell_id', 'prcf'], inplace=True)
|
2386
2409
|
if verbose:
|
2387
2410
|
print(f'png_list: {len(png_list)}, png_list grouped: {len(png_list_g_df_numeric)}')
|
2388
2411
|
print(f"Added png_list columns: {png_list_g_df_numeric.columns}, {png_list_g_df_non_numeric.columns}")
|
2389
2412
|
merged_df = merged_df.merge(png_list_g_df_numeric, left_index=True, right_index=True)
|
2390
2413
|
merged_df = merged_df.merge(png_list_g_df_non_numeric, left_index=True, right_index=True)
|
2391
|
-
|
2414
|
+
|
2392
2415
|
# Add prc (plate row column) and prcfo (plate row column field object) columns
|
2393
|
-
metadata = metadata.assign(prc=lambda x: x['
|
2416
|
+
metadata = metadata.assign(prc=lambda x: x['plateID'] + '_' + x['rowID'] + '_' + x['columnID'])
|
2394
2417
|
cells_well = metadata.groupby('prc')['object_label'].nunique().reset_index(name='cells_per_well')
|
2395
2418
|
metadata = metadata.merge(cells_well, on='prc')
|
2396
|
-
metadata = metadata.assign(prcfo=lambda x: x['
|
2419
|
+
metadata = metadata.assign(prcfo=lambda x: x['plateID'] + '_' + x['rowID'] + '_' + x['columnID'] + '_' + x['fieldID'] + '_' + x['object_label'])
|
2397
2420
|
metadata.set_index('prcfo', inplace=True)
|
2398
2421
|
|
2399
2422
|
# Merge metadata with final merged DataFrame
|
@@ -2981,7 +3004,7 @@ def training_dataset_from_annotation(db_path, dst, annotation_column='test', ann
|
|
2981
3004
|
|
2982
3005
|
return class_paths
|
2983
3006
|
|
2984
|
-
def training_dataset_from_annotation_metadata(db_path, dst, annotation_column='test', annotated_classes=(1, 2), metadata_type_by='
|
3007
|
+
def training_dataset_from_annotation_metadata(db_path, dst, annotation_column='test', annotated_classes=(1, 2), metadata_type_by='columnID', class_metadata=['c1','c2']):
|
2985
3008
|
all_paths = []
|
2986
3009
|
|
2987
3010
|
# Connect to the database and retrieve the image paths and annotations
|
@@ -3003,9 +3026,9 @@ def training_dataset_from_annotation_metadata(db_path, dst, annotation_column='t
|
|
3003
3026
|
|
3004
3027
|
# Filter all_paths by metadata_type_by and class_metadata
|
3005
3028
|
filtered_paths = []
|
3006
|
-
metadata_index = {'
|
3029
|
+
metadata_index = {'rowID': 2, 'columnID': 3}.get(metadata_type_by, None)
|
3007
3030
|
if metadata_index is None:
|
3008
|
-
raise ValueError(f"Invalid metadata_type_by value: {metadata_type_by}. Must be '
|
3031
|
+
raise ValueError(f"Invalid metadata_type_by value: {metadata_type_by}. Must be 'rowID' or 'columnID'. {class_metadata} must be a list formatted as ['c1', 'c2'] or ['r1', 'r2']")
|
3009
3032
|
|
3010
3033
|
for row in all_paths:
|
3011
3034
|
if row[metadata_index] in class_metadata:
|
@@ -3095,4 +3118,473 @@ def generate_dataset_from_lists(dst, class_data, classes, test_split=0.1):
|
|
3095
3118
|
test_class_dir = os.path.join(dst, f'test/{cls}')
|
3096
3119
|
print(f'Train class {cls}: {len(os.listdir(train_class_dir))}, Test class {cls}: {len(os.listdir(test_class_dir))}')
|
3097
3120
|
|
3098
|
-
return os.path.join(dst, 'train'), os.path.join(dst, 'test')
|
3121
|
+
return os.path.join(dst, 'train'), os.path.join(dst, 'test')
|
3122
|
+
|
3123
|
+
def convert_separate_files_to_yokogawa(folder, regex):
|
3124
|
+
|
3125
|
+
ROWS = "ABCDEFGHIJKLMNOP"
|
3126
|
+
COLS = [f"{i:02d}" for i in range(1, 25)]
|
3127
|
+
WELLS = [f"{r}{c}" for r in ROWS for c in COLS]
|
3128
|
+
|
3129
|
+
def _get_next_well(used_wells):
|
3130
|
+
plate = 1
|
3131
|
+
for well in WELLS:
|
3132
|
+
well_name = f"plate{plate}_{well}"
|
3133
|
+
if well_name not in used_wells:
|
3134
|
+
return well_name
|
3135
|
+
if well == "P24":
|
3136
|
+
plate += 1
|
3137
|
+
return f"plate{plate}_A01"
|
3138
|
+
|
3139
|
+
pattern = re.compile(regex, re.I)
|
3140
|
+
|
3141
|
+
files_by_region = {}
|
3142
|
+
rename_log = []
|
3143
|
+
csv_path = os.path.join(folder, "rename_log.csv")
|
3144
|
+
used_wells = set()
|
3145
|
+
region_to_well = {}
|
3146
|
+
|
3147
|
+
# Group files by (plateID, wellID, fieldID, timeID, chanID)
|
3148
|
+
for file in os.listdir(folder):
|
3149
|
+
match = pattern.match(file)
|
3150
|
+
if not match:
|
3151
|
+
print(f"Skipping {file}: does not match regex.")
|
3152
|
+
continue
|
3153
|
+
|
3154
|
+
meta = match.groupdict()
|
3155
|
+
|
3156
|
+
# Mandatory metadata
|
3157
|
+
if 'wellID' not in meta or meta['wellID'] is None:
|
3158
|
+
print(f"Skipping {file}: missing mandatory wellID.")
|
3159
|
+
continue
|
3160
|
+
wellID = meta['wellID']
|
3161
|
+
|
3162
|
+
# Optional metadata with defaults
|
3163
|
+
plateID = meta.get('plateID', '1') or '1'
|
3164
|
+
fieldID = meta.get('fieldID', '1') or '1'
|
3165
|
+
timeID = int(meta.get('timeID', 1) or 1)
|
3166
|
+
chanID = int(meta.get('chanID', 1) or 1)
|
3167
|
+
sliceID = meta.get('sliceID')
|
3168
|
+
sliceID = int(sliceID) if sliceID is not None else None
|
3169
|
+
|
3170
|
+
region_key = (plateID, wellID, fieldID, timeID, chanID)
|
3171
|
+
|
3172
|
+
files_by_region.setdefault(region_key, []).append((file, sliceID))
|
3173
|
+
|
3174
|
+
# Assign wells and process files per region
|
3175
|
+
for region, file_list in files_by_region.items():
|
3176
|
+
if region[:3] not in region_to_well:
|
3177
|
+
next_well = _get_next_well(used_wells)
|
3178
|
+
region_to_well[region[:3]] = next_well
|
3179
|
+
used_wells.add(next_well)
|
3180
|
+
|
3181
|
+
assigned_well = region_to_well[region[:3]]
|
3182
|
+
plateID, wellID, fieldID, timeID, chanID = region
|
3183
|
+
|
3184
|
+
# Check if multiple slices exist and are meaningful
|
3185
|
+
slice_ids = [sid for _, sid in file_list if sid is not None]
|
3186
|
+
unique_slices = set(slice_ids)
|
3187
|
+
|
3188
|
+
images = []
|
3189
|
+
for filename, _ in sorted(file_list, key=lambda x: x[1] or 1):
|
3190
|
+
img = tifffile.imread(os.path.join(folder, filename))
|
3191
|
+
images.append(img)
|
3192
|
+
|
3193
|
+
# Perform MIP only if multiple unique slices are present
|
3194
|
+
if len(unique_slices) > 1:
|
3195
|
+
img_to_save = np.max(np.stack(images), axis=0)
|
3196
|
+
else:
|
3197
|
+
img_to_save = images[0]
|
3198
|
+
|
3199
|
+
dtype = img_to_save.dtype
|
3200
|
+
|
3201
|
+
new_filename = f"{assigned_well}_T{timeID:04d}F{int(fieldID):03d}L01C{chanID:02d}.tif"
|
3202
|
+
new_filepath = os.path.join(folder, new_filename)
|
3203
|
+
tifffile.imwrite(new_filepath, img_to_save.astype(dtype))
|
3204
|
+
|
3205
|
+
# Log original filenames involved in MIP or single file rename
|
3206
|
+
original_files = ";".join(f[0] for f in file_list)
|
3207
|
+
rename_log.append({"Original File(s)": original_files, "Renamed TIFF": new_filename})
|
3208
|
+
|
3209
|
+
pd.DataFrame(rename_log).to_csv(csv_path, index=False)
|
3210
|
+
print(f"Processing complete. Files saved in {folder} and rename log saved as {csv_path}.")
|
3211
|
+
|
3212
|
+
def convert_to_yokogawa(folder):
|
3213
|
+
"""
|
3214
|
+
Detects file type in the folder and converts them
|
3215
|
+
to Yokogawa-style naming with Maximum Intensity Projection (MIP).
|
3216
|
+
"""
|
3217
|
+
|
3218
|
+
#def _get_next_well(used_wells):
|
3219
|
+
# """
|
3220
|
+
# Determines the next available well position in a 384-well format.
|
3221
|
+
# Iterates wells, and after P24, switches to plate2.
|
3222
|
+
# """
|
3223
|
+
# plate = 1
|
3224
|
+
# for well in WELLS:
|
3225
|
+
# well_name = f"plate{plate}_{well}"
|
3226
|
+
# if well_name not in used_wells:
|
3227
|
+
# used_wells.add(well_name)
|
3228
|
+
# return well_name
|
3229
|
+
# if well == "P24":
|
3230
|
+
# plate += 1
|
3231
|
+
# raise ValueError("All wells exhausted.")
|
3232
|
+
|
3233
|
+
def _get_next_well(used_wells):
|
3234
|
+
"""
|
3235
|
+
Determines the next available well position across multiple 384-well plates.
|
3236
|
+
"""
|
3237
|
+
ROWS = "ABCDEFGHIJKLMNOP"
|
3238
|
+
COLS = [f"{i:02d}" for i in range(1, 25)]
|
3239
|
+
WELLS = [f"{r}{c}" for r in ROWS for c in COLS]
|
3240
|
+
|
3241
|
+
plate = 1
|
3242
|
+
while True:
|
3243
|
+
for well in WELLS:
|
3244
|
+
well_name = f"plate{plate}_{well}"
|
3245
|
+
if well_name not in used_wells:
|
3246
|
+
used_wells.add(well_name)
|
3247
|
+
return well_name
|
3248
|
+
plate += 1 # All wells exhausted in current plate, increment to next plate
|
3249
|
+
|
3250
|
+
|
3251
|
+
# Define 384-well plate format
|
3252
|
+
ROWS = "ABCDEFGHIJKLMNOP"
|
3253
|
+
COLS = [f"{i:02d}" for i in range(1, 25)]
|
3254
|
+
WELLS = [f"{r}{c}" for r in ROWS for c in COLS]
|
3255
|
+
|
3256
|
+
filenames = []
|
3257
|
+
rename_log = []
|
3258
|
+
csv_path = os.path.join(folder, "rename_log.csv")
|
3259
|
+
used_wells = set()
|
3260
|
+
|
3261
|
+
# **Dictionary to store well assignments per original file**
|
3262
|
+
file_to_well = {}
|
3263
|
+
|
3264
|
+
for file in os.listdir(folder):
|
3265
|
+
path = os.path.join(folder, file)
|
3266
|
+
ext = file.lower().split('.')[-1]
|
3267
|
+
|
3268
|
+
# **Assign a well only once per original file**
|
3269
|
+
if file not in file_to_well:
|
3270
|
+
file_to_well[file] = _get_next_well(used_wells)
|
3271
|
+
#used_wells.add(file_to_well[file]) # Mark it as used
|
3272
|
+
|
3273
|
+
well = file_to_well[file] # Use the same well for all channels/times
|
3274
|
+
|
3275
|
+
### **Process Nikon ND2 Files**
|
3276
|
+
if ext == 'nd2':
|
3277
|
+
try:
|
3278
|
+
nd2 = ND2Reader(path)
|
3279
|
+
metadata = nd2.metadata
|
3280
|
+
|
3281
|
+
timepoints = list(range(len(metadata.get("frames", [0])))) or [0]
|
3282
|
+
fields = list(range(len(metadata.get("fields_of_view", [0])))) or [0]
|
3283
|
+
z_levels = list(metadata.get("z_levels", range(1))) if metadata.get("z_levels") else [0]
|
3284
|
+
channels = metadata.get("channels", [])
|
3285
|
+
|
3286
|
+
for t_idx in timepoints:
|
3287
|
+
for f_idx in fields:
|
3288
|
+
for c_idx, channel in enumerate(channels):
|
3289
|
+
try:
|
3290
|
+
mip_image = np.max.reduce([
|
3291
|
+
nd2.get_frame_2D(t=t_idx, v=f_idx, z=z_idx, c=c_idx)
|
3292
|
+
for z_idx in z_levels
|
3293
|
+
], axis=0)
|
3294
|
+
|
3295
|
+
dtype = mip_image.dtype
|
3296
|
+
filename = f"{well}_T{t_idx+1:04d}F{f_idx+1:03d}L01C{c_idx+1:02d}.tif"
|
3297
|
+
filepath = os.path.join(folder, filename)
|
3298
|
+
|
3299
|
+
tifffile.imwrite(filepath, mip_image.astype(dtype))
|
3300
|
+
rename_log.append({"Original File": file, "Renamed TIFF": filename})
|
3301
|
+
|
3302
|
+
except IndexError:
|
3303
|
+
print(f"Warning: ND2 file {file} has an incomplete data structure. Skipping.")
|
3304
|
+
|
3305
|
+
except Exception as e:
|
3306
|
+
print(f"Error processing ND2 file {file}: {e}")
|
3307
|
+
|
3308
|
+
### **Process Zeiss CZI Files**
|
3309
|
+
elif ext == 'czi':
|
3310
|
+
with czifile.CziFile(path) as czi:
|
3311
|
+
img_data = czi.asarray() # Read the full image array
|
3312
|
+
|
3313
|
+
# Remove singleton dimensions (if any)
|
3314
|
+
img_data = np.squeeze(img_data)
|
3315
|
+
|
3316
|
+
# Get the actual shape of the data
|
3317
|
+
shape = img_data.shape
|
3318
|
+
num_dims = len(shape)
|
3319
|
+
|
3320
|
+
# Default values if dimensions are missing
|
3321
|
+
timepoints = 1
|
3322
|
+
z_levels = 1
|
3323
|
+
channels = 1
|
3324
|
+
|
3325
|
+
# Determine dimension mapping dynamically
|
3326
|
+
if num_dims == 2: # (Y, X) → Single 2D image
|
3327
|
+
y_dim, x_dim = shape
|
3328
|
+
img_data = img_data.reshape(1, 1, 1, y_dim, x_dim) # Add missing dimensions
|
3329
|
+
elif num_dims == 3: # (C, Y, X) or (Z, Y, X)
|
3330
|
+
if shape[0] <= 4: # Likely (C, Y, X)
|
3331
|
+
channels, y_dim, x_dim = shape
|
3332
|
+
img_data = img_data.reshape(1, 1, channels, y_dim, x_dim) # Add missing dimensions
|
3333
|
+
else: # Likely (Z, Y, X)
|
3334
|
+
z_levels, y_dim, x_dim = shape
|
3335
|
+
img_data = img_data.reshape(1, z_levels, 1, y_dim, x_dim) # Add missing dimensions
|
3336
|
+
elif num_dims == 4: # Could be (T, C, Y, X) or (T, Z, Y, X) or (Z, C, Y, X)
|
3337
|
+
if shape[1] <= 4: # Assume (T, C, Y, X)
|
3338
|
+
timepoints, channels, y_dim, x_dim = shape
|
3339
|
+
img_data = img_data.reshape(timepoints, 1, channels, y_dim, x_dim) # Add missing Z
|
3340
|
+
else: # Assume (T, Z, Y, X) or (Z, C, Y, X)
|
3341
|
+
timepoints, z_levels, y_dim, x_dim = shape
|
3342
|
+
img_data = img_data.reshape(timepoints, z_levels, 1, y_dim, x_dim) # Add missing C
|
3343
|
+
elif num_dims == 5: # Standard (T, Z, C, Y, X)
|
3344
|
+
timepoints, z_levels, channels, y_dim, x_dim = shape
|
3345
|
+
else:
|
3346
|
+
raise ValueError(f"Unexpected CZI shape: {shape}. Unable to process.")
|
3347
|
+
|
3348
|
+
# Iterate over detected timepoints, channels, and perform MIP over Z
|
3349
|
+
for t_idx in range(timepoints):
|
3350
|
+
for c_idx in range(channels):
|
3351
|
+
# Extract Z-stack or single image
|
3352
|
+
if z_levels > 1:
|
3353
|
+
z_stack = img_data[t_idx, :, c_idx, :, :] # MIP over Z
|
3354
|
+
mip_image = np.max(z_stack, axis=0)
|
3355
|
+
else:
|
3356
|
+
mip_image = img_data[t_idx, 0, c_idx, :, :] # No Z, take directly
|
3357
|
+
|
3358
|
+
# Ensure correct dtype
|
3359
|
+
dtype = mip_image.dtype
|
3360
|
+
|
3361
|
+
# Generate Yokogawa-style filename
|
3362
|
+
filename = f"{well}_T{t_idx+1:04d}F001L01C{c_idx+1:02d}.tif"
|
3363
|
+
filepath = os.path.join(folder, filename)
|
3364
|
+
|
3365
|
+
# Save the extracted image
|
3366
|
+
tifffile.imwrite(filepath, mip_image.astype(dtype))
|
3367
|
+
|
3368
|
+
rename_log.append({"Original File": file, "Renamed TIFF": filename})
|
3369
|
+
|
3370
|
+
### **Process Leica LIF Files**
|
3371
|
+
elif ext == 'lif':
|
3372
|
+
try:
|
3373
|
+
lif_file = readlif.Reader(path)
|
3374
|
+
|
3375
|
+
for image_idx, image in enumerate(lif_file.getIterImage()):
|
3376
|
+
timepoints = range(getattr(image.dims, 't', 1))
|
3377
|
+
z_levels = range(getattr(image.dims, 'z', 1))
|
3378
|
+
channels = range(getattr(image.dims, 'c', 1))
|
3379
|
+
|
3380
|
+
for t_idx in timepoints:
|
3381
|
+
for c_idx in channels:
|
3382
|
+
z_stack = []
|
3383
|
+
for z_idx in z_levels:
|
3384
|
+
try:
|
3385
|
+
frame = image.getFrame(z=z_idx, t=t_idx, c=c_idx)
|
3386
|
+
z_stack.append(frame)
|
3387
|
+
except IndexError:
|
3388
|
+
print(f"Missing frame: T{t_idx}, Z{z_idx}, C{c_idx} in {file}, skipping frame.")
|
3389
|
+
|
3390
|
+
if z_stack:
|
3391
|
+
mip_image = np.max(np.stack(z_stack), axis=0)
|
3392
|
+
dtype = mip_image.dtype
|
3393
|
+
filename = f"{well}_T{t_idx+1:04d}F{image_idx+1:03d}L01C{c_idx+1:02d}.tif"
|
3394
|
+
filepath = os.path.join(folder, filename)
|
3395
|
+
|
3396
|
+
tifffile.imwrite(filepath, mip_image.astype(dtype))
|
3397
|
+
rename_log.append({"Original File": file, "Renamed TIFF": filename})
|
3398
|
+
|
3399
|
+
except Exception as e:
|
3400
|
+
print(f"Error processing LIF file {file}: {e}")
|
3401
|
+
|
3402
|
+
### **Process Standard Image Files (TIFF, PNG, JPEG, BMP)**
|
3403
|
+
elif ext in ['tif', 'tiff', 'png', 'jpg', 'jpeg', 'bmp'] and not file.startswith("plate"):
|
3404
|
+
try:
|
3405
|
+
with tifffile.TiffFile(path) as tif:
|
3406
|
+
images = tif.asarray()
|
3407
|
+
ndim = images.ndim
|
3408
|
+
|
3409
|
+
# Defaults
|
3410
|
+
t_dim = z_dim = c_dim = 1
|
3411
|
+
|
3412
|
+
# Determine dimensions more explicitly
|
3413
|
+
if ndim == 2:
|
3414
|
+
mip_image = images
|
3415
|
+
filename = f"{well}_T0001F001L01C01.tif"
|
3416
|
+
tifffile.imwrite(os.path.join(folder, filename), mip_image)
|
3417
|
+
rename_log.append({"Original File": file, "Renamed TIFF": filename})
|
3418
|
+
continue
|
3419
|
+
|
3420
|
+
elif ndim == 3:
|
3421
|
+
if images.shape[0] <= 4: # Likely channels
|
3422
|
+
c_dim = images.shape[0]
|
3423
|
+
for c in range(c_dim):
|
3424
|
+
mip_image = images[c, :, :]
|
3425
|
+
filename = f"{well}_T0001F001L01C{c+1:02d}.tif"
|
3426
|
+
tifffile.imwrite(os.path.join(folder, filename), mip_image)
|
3427
|
+
rename_log.append({"Original File": file, "Renamed TIFF": filename})
|
3428
|
+
else: # Z-stack
|
3429
|
+
mip_image = np.max(images, axis=0)
|
3430
|
+
filename = f"{well}_T0001F001L01C01.tif"
|
3431
|
+
tifffile.imwrite(os.path.join(folder, filename), mip_image)
|
3432
|
+
rename_log.append({"Original File": file, "Renamed TIFF": filename})
|
3433
|
+
|
3434
|
+
elif ndim == 4:
|
3435
|
+
t_dim, z_dim, y_dim, x_dim = images.shape
|
3436
|
+
for t in range(t_dim):
|
3437
|
+
mip_image = np.max(images[t, :, :, :], axis=0)
|
3438
|
+
filename = f"{well}_T{t+1:04d}F001L01C01.tif"
|
3439
|
+
tifffile.imwrite(os.path.join(folder, filename), mip_image)
|
3440
|
+
rename_log.append({"Original File": file, "Renamed TIFF": filename})
|
3441
|
+
|
3442
|
+
else:
|
3443
|
+
raise ValueError(f"Unsupported TIFF dimensions: {images.shape}")
|
3444
|
+
|
3445
|
+
except Exception as e:
|
3446
|
+
print(f"Error processing standard image file {file}: {e}")
|
3447
|
+
|
3448
|
+
|
3449
|
+
# Save rename log as CSV
|
3450
|
+
pd.DataFrame(rename_log).to_csv(csv_path, index=False)
|
3451
|
+
print(f"Processing complete. Files saved in {folder} and rename log saved as {csv_path}.")
|
3452
|
+
|
3453
|
+
def apply_augmentation(image, method):
|
3454
|
+
if method == 'rotate90':
|
3455
|
+
return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
|
3456
|
+
elif method == 'rotate180':
|
3457
|
+
return cv2.rotate(image, cv2.ROTATE_180)
|
3458
|
+
elif method == 'rotate270':
|
3459
|
+
return cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
|
3460
|
+
elif method == 'flip_h':
|
3461
|
+
return cv2.flip(image, 1)
|
3462
|
+
elif method == 'flip_v':
|
3463
|
+
return cv2.flip(image, 0)
|
3464
|
+
return image
|
3465
|
+
|
3466
|
+
def process_instruction(entry):
|
3467
|
+
img = tifffile.imread(entry["src_img"])
|
3468
|
+
msk = tifffile.imread(entry["src_msk"])
|
3469
|
+
if entry["augment"]:
|
3470
|
+
img = apply_augmentation(img, entry["augment"])
|
3471
|
+
msk = apply_augmentation(msk, entry["augment"])
|
3472
|
+
tifffile.imwrite(entry["dst_img"], img)
|
3473
|
+
tifffile.imwrite(entry["dst_msk"], msk)
|
3474
|
+
return 1
|
3475
|
+
|
3476
|
+
def prepare_cellpose_dataset(input_root, augment_data=False, train_fraction=0.8, n_jobs=None):
|
3477
|
+
|
3478
|
+
from .utils import print_progress
|
3479
|
+
|
3480
|
+
time_ls = []
|
3481
|
+
input_root = os.path.abspath(input_root)
|
3482
|
+
output_root = os.path.join(input_root, "cellpose_dataset")
|
3483
|
+
|
3484
|
+
def get_augmentations():
|
3485
|
+
return ['rotate90', 'rotate180', 'rotate270', 'flip_h', 'flip_v']
|
3486
|
+
|
3487
|
+
def find_image_mask_pairs(dataset_path):
|
3488
|
+
mask_dir = os.path.join(dataset_path, "masks")
|
3489
|
+
pairs = []
|
3490
|
+
for fname in os.listdir(dataset_path):
|
3491
|
+
if fname.lower().endswith((".tif", ".tiff")):
|
3492
|
+
img_path = os.path.join(dataset_path, fname)
|
3493
|
+
msk_path = os.path.join(mask_dir, fname)
|
3494
|
+
if os.path.isfile(msk_path):
|
3495
|
+
pairs.append((img_path, msk_path))
|
3496
|
+
return pairs
|
3497
|
+
|
3498
|
+
def prepare_output_folders(base):
|
3499
|
+
for subset in ["train", "test"]:
|
3500
|
+
os.makedirs(os.path.join(base, subset, "images"), exist_ok=True)
|
3501
|
+
os.makedirs(os.path.join(base, subset, "masks"), exist_ok=True)
|
3502
|
+
|
3503
|
+
print("Scanning datasets...")
|
3504
|
+
datasets = []
|
3505
|
+
for subdir in os.listdir(input_root):
|
3506
|
+
dataset_path = os.path.join(input_root, subdir)
|
3507
|
+
if os.path.isdir(dataset_path) and os.path.isdir(os.path.join(dataset_path, "masks")):
|
3508
|
+
pairs = find_image_mask_pairs(dataset_path)
|
3509
|
+
if pairs:
|
3510
|
+
datasets.append(pairs)
|
3511
|
+
print(f" Found {len(pairs)} images in {dataset_path}")
|
3512
|
+
|
3513
|
+
if not datasets:
|
3514
|
+
raise ValueError("No valid datasets with images and masks found.")
|
3515
|
+
|
3516
|
+
prepare_output_folders(output_root)
|
3517
|
+
|
3518
|
+
min_size = min(len(pairs) for pairs in datasets)
|
3519
|
+
target_size = min_size if not augment_data else max(len(pairs) for pairs in datasets)
|
3520
|
+
|
3521
|
+
print("\nPreparing instruction list...")
|
3522
|
+
instructions = []
|
3523
|
+
global_index = 0
|
3524
|
+
|
3525
|
+
for pairs in datasets:
|
3526
|
+
dataset_len = len(pairs)
|
3527
|
+
|
3528
|
+
# --- Step 1: Sample or augment ---
|
3529
|
+
sampled_pairs = []
|
3530
|
+
if dataset_len >= target_size:
|
3531
|
+
sampled_pairs = random.sample(pairs, target_size)
|
3532
|
+
else:
|
3533
|
+
sampled_pairs = pairs.copy()
|
3534
|
+
if augment_data:
|
3535
|
+
needed = target_size - dataset_len
|
3536
|
+
aug_methods = get_augmentations()
|
3537
|
+
full_loops = needed // len(aug_methods)
|
3538
|
+
extra = needed % len(aug_methods)
|
3539
|
+
|
3540
|
+
for _ in range(full_loops):
|
3541
|
+
for (img_path, msk_path), aug in zip(pairs, aug_methods * (dataset_len // len(aug_methods))):
|
3542
|
+
sampled_pairs.append((img_path, msk_path, aug))
|
3543
|
+
if extra > 0:
|
3544
|
+
subset = random.sample(pairs * ((extra // len(aug_methods)) + 1), extra)
|
3545
|
+
for (img_path, msk_path), aug in zip(subset, aug_methods[:extra]):
|
3546
|
+
sampled_pairs.append((img_path, msk_path, aug))
|
3547
|
+
|
3548
|
+
# Add "no augmentation" tag to original files
|
3549
|
+
augmented_sampled = [
|
3550
|
+
(tup[0], tup[1], None) if len(tup) == 2 else tup
|
3551
|
+
for tup in sampled_pairs
|
3552
|
+
]
|
3553
|
+
|
3554
|
+
# --- Step 2: Split into train/test ---
|
3555
|
+
random.shuffle(augmented_sampled)
|
3556
|
+
split_idx = int(train_fraction * len(augmented_sampled))
|
3557
|
+
split_sets = {
|
3558
|
+
"train": augmented_sampled[:split_idx],
|
3559
|
+
"test": augmented_sampled[split_idx:]
|
3560
|
+
}
|
3561
|
+
|
3562
|
+
for subset, items in split_sets.items():
|
3563
|
+
for img_path, msk_path, aug in items:
|
3564
|
+
dst_img = os.path.join(output_root, subset, "images", f"{global_index:05d}.tif")
|
3565
|
+
dst_msk = os.path.join(output_root, subset, "masks", f"{global_index:05d}.tif")
|
3566
|
+
instructions.append({
|
3567
|
+
"src_img": img_path,
|
3568
|
+
"src_msk": msk_path,
|
3569
|
+
"dst_img": dst_img,
|
3570
|
+
"dst_msk": dst_msk,
|
3571
|
+
"augment": aug
|
3572
|
+
})
|
3573
|
+
global_index += 1
|
3574
|
+
|
3575
|
+
print(f"Total files to process: {len(instructions)}")
|
3576
|
+
|
3577
|
+
# --- Step 3: Process with multiprocessing ---
|
3578
|
+
print("Processing images with multiprocessing...")
|
3579
|
+
|
3580
|
+
if n_jobs is None:
|
3581
|
+
n_jobs = max(1, cpu_count() - 1)
|
3582
|
+
else:
|
3583
|
+
n_jobs = int(n_jobs)
|
3584
|
+
|
3585
|
+
with Pool(n_jobs) as pool:
|
3586
|
+
for i, _ in enumerate(pool.imap_unordered(process_instruction, instructions), 1):
|
3587
|
+
print_progress(i, len(instructions), n_jobs=n_jobs, time_ls=time_ls, batch_size=None, operation_type="cellpose dataset")
|
3588
|
+
|
3589
|
+
print(f"Done. Dataset saved to: {output_root}")
|
3590
|
+
|