spacr 0.0.18__py3-none-any.whl → 0.0.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/io.py CHANGED
@@ -1,6 +1,7 @@
1
- import os, re, sqlite3, gc, torch, time, random, shutil, cv2, tarfile, cellpose
1
+ import os, re, sqlite3, gc, torch, time, random, shutil, cv2, tarfile, cellpose, glob
2
2
  import numpy as np
3
3
  import pandas as pd
4
+ import tifffile
4
5
  from PIL import Image
5
6
  from collections import defaultdict, Counter
6
7
  from pathlib import Path
@@ -22,9 +23,9 @@ import seaborn as sns
22
23
  import matplotlib.pyplot as plt
23
24
  from torchvision.transforms import ToTensor
24
25
 
26
+
25
27
  from .logger import log_function_call
26
28
 
27
- @log_function_call
28
29
  def _load_images_and_labels(image_files, label_files, circular=False, invert=False, image_extension="*.tif", label_extension="*.tif"):
29
30
 
30
31
  from .utils import invert_image, apply_mask
@@ -44,19 +45,19 @@ def _load_images_and_labels(image_files, label_files, circular=False, invert=Fal
44
45
 
45
46
  if not image_files is None and not label_files is None:
46
47
  for img_file, lbl_file in zip(image_files, label_files):
47
- image = cellpose.imread(img_file)
48
+ image = cellpose.io.imread(img_file)
48
49
  if invert:
49
50
  image = invert_image(image)
50
51
  if circular:
51
52
  image = apply_mask(image, output_value=0)
52
- label = cellpose.imread(lbl_file)
53
+ label = cellpose.io.imread(lbl_file)
53
54
  if image.max() > 1:
54
55
  image = image / image.max()
55
56
  images.append(image)
56
57
  labels.append(label)
57
58
  elif not image_files is None:
58
59
  for img_file in image_files:
59
- image = cellpose.imread(img_file)
60
+ image = cellpose.io.imread(img_file)
60
61
  if invert:
61
62
  image = invert_image(image)
62
63
  if circular:
@@ -66,7 +67,7 @@ def _load_images_and_labels(image_files, label_files, circular=False, invert=Fal
66
67
  images.append(image)
67
68
  elif not image_files is None:
68
69
  for lbl_file in label_files:
69
- label = cellpose.imread(lbl_file)
70
+ label = cellpose.io.imread(lbl_file)
70
71
  if circular:
71
72
  label = apply_mask(label, output_value=0)
72
73
  labels.append(label)
@@ -87,7 +88,6 @@ def _load_images_and_labels(image_files, label_files, circular=False, invert=Fal
87
88
  print(f'image shape: {images[0].shape}, image type: images[0].shape mask shape: {labels[0].shape}, image type: labels[0].shape')
88
89
  return images, labels, image_names, label_names
89
90
 
90
- @log_function_call
91
91
  def _load_normalized_images_and_labels(image_files, label_files, signal_thresholds=[1000], channels=None, percentiles=None, circular=False, invert=False, visualize=False):
92
92
 
93
93
  from .plot import normalize_and_visualize
@@ -109,15 +109,17 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
109
109
 
110
110
  if label_files is not None:
111
111
  label_names = [os.path.basename(f) for f in label_files]
112
+ label_dir = os.path.dirname(label_files[0])
112
113
 
113
114
  # Load images and check percentiles
114
115
  for i,img_file in enumerate(image_files):
115
- image = cellpose.imread(img_file)
116
+ #print(img_file)
117
+ image = cellpose.io.imread(img_file)
116
118
  if invert:
117
119
  image = invert_image(image)
118
120
  if circular:
119
121
  image = apply_mask(image, output_value=0)
120
-
122
+ #print(image.shape)
121
123
  # If specific channels are specified, select them
122
124
  if channels is not None and image.ndim == 3:
123
125
  image = image[..., channels]
@@ -169,7 +171,7 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
169
171
 
170
172
  if label_files is not None:
171
173
  for lbl_file in label_files:
172
- labels.append(cellpose.imread(lbl_file))
174
+ labels.append(cellpose.io.imread(lbl_file))
173
175
  else:
174
176
  label_names = []
175
177
  label_dir = None
@@ -178,85 +180,6 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
178
180
 
179
181
  return normalized_images, labels, image_names, label_names
180
182
 
181
- class MyDataset(Dataset):
182
- """
183
- Custom dataset class for loading and processing image data.
184
-
185
- Args:
186
- data_dir (str): The directory path where the data is stored.
187
- loader_classes (list): List of class names.
188
- transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. Default is None.
189
- shuffle (bool, optional): Whether to shuffle the dataset. Default is True.
190
- load_to_memory (bool, optional): Whether to load images into memory. Default is False.
191
-
192
- Attributes:
193
- data_dir (str): The directory path where the data is stored.
194
- classes (list): List of class names.
195
- transform (callable): A function/transform that takes in an PIL image and returns a transformed version.
196
- shuffle (bool): Whether to shuffle the dataset.
197
- load_to_memory (bool): Whether to load images into memory.
198
- filenames (list): List of file paths.
199
- labels (list): List of labels corresponding to each file.
200
- images (list): List of loaded images.
201
- image_cache (Cache): Cache object for storing loaded images.
202
-
203
- Methods:
204
- load_image: Load an image from file.
205
- __len__: Get the length of the dataset.
206
- shuffle_dataset: Shuffle the dataset.
207
- __getitem__: Get an item from the dataset.
208
-
209
- """
210
-
211
- def _init__(self, data_dir, loader_classes, transform=None, shuffle=True, load_to_memory=False):
212
- from .utils import Cache
213
- self.data_dir = data_dir
214
- self.classes = loader_classes
215
- self.transform = transform
216
- self.shuffle = shuffle
217
- self.load_to_memory = load_to_memory
218
- self.filenames = []
219
- self.labels = []
220
- self.images = []
221
- self.image_cache = Cache(50)
222
- for class_name in self.classes:
223
- class_path = os.path.join(data_dir, class_name)
224
- class_files = [os.path.join(class_path, f) for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
225
- self.filenames.extend(class_files)
226
- self.labels.extend([self.classes.index(class_name)] * len(class_files))
227
- if self.shuffle:
228
- self.shuffle_dataset()
229
- if self.load_to_memory:
230
- self.images = [self.load_image(f) for f in self.filenames]
231
-
232
- def load_image(self, img_path):
233
- img = self.image_cache.get(img_path)
234
- if img is None:
235
- img = Image.open(img_path).convert('RGB')
236
- self.image_cache.put(img_path, img)
237
- return img
238
-
239
- def _len__(self):
240
- return len(self.filenames)
241
-
242
- def shuffle_dataset(self):
243
- combined = list(zip(self.filenames, self.labels))
244
- random.shuffle(combined)
245
- self.filenames, self.labels = zip(*combined)
246
-
247
- def _getitem__(self, index):
248
- label = self.labels[index]
249
- filename = self.filenames[index]
250
- if self.load_to_memory:
251
- img = self.images[index]
252
- else:
253
- img = self.load_image(filename)
254
- if self.transform is not None:
255
- img = self.transform(img)
256
- else:
257
- img = ToTensor()(img)
258
- return img, label, filename
259
-
260
183
  class CombineLoaders:
261
184
  """
262
185
  A class that combines multiple data loaders into a single iterator.
@@ -383,6 +306,85 @@ class NoClassDataset(Dataset):
383
306
  img = ToTensor()(img)
384
307
  # Return both the image and its filename
385
308
  return img, self.filenames[index]
309
+
310
+ class MyDataset_v1(Dataset):
311
+ """
312
+ Custom dataset class for loading and processing image data.
313
+
314
+ Args:
315
+ data_dir (str): The directory path where the data is stored.
316
+ loader_classes (list): List of class names.
317
+ transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. Default is None.
318
+ shuffle (bool, optional): Whether to shuffle the dataset. Default is True.
319
+ load_to_memory (bool, optional): Whether to load images into memory. Default is False.
320
+
321
+ Attributes:
322
+ data_dir (str): The directory path where the data is stored.
323
+ classes (list): List of class names.
324
+ transform (callable): A function/transform that takes in an PIL image and returns a transformed version.
325
+ shuffle (bool): Whether to shuffle the dataset.
326
+ load_to_memory (bool): Whether to load images into memory.
327
+ filenames (list): List of file paths.
328
+ labels (list): List of labels corresponding to each file.
329
+ images (list): List of loaded images.
330
+ image_cache (Cache): Cache object for storing loaded images.
331
+
332
+ Methods:
333
+ load_image: Load an image from file.
334
+ __len__: Get the length of the dataset.
335
+ shuffle_dataset: Shuffle the dataset.
336
+ __getitem__: Get an item from the dataset.
337
+
338
+ """
339
+
340
+ def __init__(self, data_dir, loader_classes, transform=None, shuffle=True, load_to_memory=False):
341
+ from .utils import Cache
342
+ self.data_dir = data_dir
343
+ self.classes = loader_classes
344
+ self.transform = transform
345
+ self.shuffle = shuffle
346
+ self.load_to_memory = load_to_memory
347
+ self.filenames = []
348
+ self.labels = []
349
+ self.images = []
350
+ self.image_cache = Cache(50)
351
+ for class_name in self.classes:
352
+ class_path = os.path.join(data_dir, class_name)
353
+ class_files = [os.path.join(class_path, f) for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
354
+ self.filenames.extend(class_files)
355
+ self.labels.extend([self.classes.index(class_name)] * len(class_files))
356
+ if self.shuffle:
357
+ self.shuffle_dataset()
358
+ if self.load_to_memory:
359
+ self.images = [self.load_image(f) for f in self.filenames]
360
+
361
+ def load_image(self, img_path):
362
+ img = self.image_cache.get(img_path)
363
+ if img is None:
364
+ img = Image.open(img_path).convert('RGB')
365
+ self.image_cache.put(img_path, img)
366
+ return img
367
+
368
+ def _len__(self):
369
+ return len(self.filenames)
370
+
371
+ def shuffle_dataset(self):
372
+ combined = list(zip(self.filenames, self.labels))
373
+ random.shuffle(combined)
374
+ self.filenames, self.labels = zip(*combined)
375
+
376
+ def _getitem__(self, index):
377
+ label = self.labels[index]
378
+ filename = self.filenames[index]
379
+ if self.load_to_memory:
380
+ img = self.images[index]
381
+ else:
382
+ img = self.load_image(filename)
383
+ if self.transform is not None:
384
+ img = self.transform(img)
385
+ else:
386
+ img = ToTensor()(img)
387
+ return img, label, filename
386
388
 
387
389
  class MyDataset(Dataset):
388
390
  """
@@ -398,7 +400,7 @@ class MyDataset(Dataset):
398
400
  specific_labels (list, optional): A list of specific labels corresponding to the specific files. Default is None.
399
401
  """
400
402
 
401
- def _init__(self, data_dir, loader_classes, transform=None, shuffle=True, pin_memory=False, specific_files=None, specific_labels=None):
403
+ def __init__(self, data_dir, loader_classes, transform=None, shuffle=True, pin_memory=False, specific_files=None, specific_labels=None):
402
404
  self.data_dir = data_dir
403
405
  self.classes = loader_classes
404
406
  self.transform = transform
@@ -427,7 +429,7 @@ class MyDataset(Dataset):
427
429
  img = Image.open(img_path).convert('RGB')
428
430
  return img
429
431
 
430
- def _len__(self):
432
+ def __len__(self):
431
433
  return len(self.filenames)
432
434
 
433
435
  def shuffle_dataset(self):
@@ -439,7 +441,7 @@ class MyDataset(Dataset):
439
441
  filename = os.path.basename(filepath) # Get just the filename from the full path
440
442
  return filename.split('_')[0]
441
443
 
442
- def _getitem__(self, index):
444
+ def __getitem__(self, index):
443
445
  label = self.labels[index]
444
446
  filename = self.filenames[index]
445
447
  img = self.load_image(filename)
@@ -527,6 +529,7 @@ class TarImageDataset(Dataset):
527
529
 
528
530
  return img, m.name
529
531
 
532
+ @log_function_call
530
533
  def _rename_and_organize_image_files(src, regex, batch_size=100, pick_slice=False, skip_mode='01', metadata_type='', img_format='.tif'):
531
534
  """
532
535
  Convert z-stack images to maximum intensity projection (MIP) images.
@@ -599,7 +602,7 @@ def _rename_and_organize_image_files(src, regex, batch_size=100, pick_slice=Fals
599
602
  shutil.move(os.path.join(src, filename), move)
600
603
  return
601
604
 
602
- def _merge_file(chan_dirs, stack_dir, file):
605
+ def _merge_file_v1(chan_dirs, stack_dir, file):
603
606
  """
604
607
  Merge multiple channels into a single stack and save it as a numpy array.
605
608
 
@@ -624,15 +627,80 @@ def _merge_file(chan_dirs, stack_dir, file):
624
627
  stack = np.concatenate(channels, axis=2)
625
628
  np.save(new_file, stack)
626
629
 
627
- def _is_dir_empty(dir_path):
630
+ def _merge_file_v1(chan_dirs, stack_dir, file):
628
631
  """
629
- Check if a directory is empty.
632
+ Merge multiple channels into a single stack and save it as a numpy array.
633
+ Args:
634
+ chan_dirs (list): List of directories containing channel images.
635
+ stack_dir (str): Directory to save the merged stack.
636
+ file (str): File name of the channel image.
630
637
 
638
+ Returns:
639
+ None
640
+ """
641
+ new_file = stack_dir / (file.stem + '.npy')
642
+ if not new_file.exists():
643
+ stack_dir.mkdir(exist_ok=True)
644
+ channels = []
645
+ for i, chan_dir in enumerate(chan_dirs):
646
+ img_path = str(chan_dir / file.name)
647
+ img = cv2.imread(img_path, -1)
648
+ if img is None:
649
+ print(f"Warning: Failed to read image {img_path}")
650
+ continue
651
+ chan = np.expand_dims(img, axis=2)
652
+ channels.append(chan)
653
+ del img # Explicitly delete the reference to the image to free up memory
654
+ if i % 10 == 0: # Periodically suggest garbage collection
655
+ gc.collect()
656
+
657
+ if channels:
658
+ stack = np.concatenate(channels, axis=2)
659
+ np.save(new_file, stack)
660
+ else:
661
+ print(f"No valid channels to merge for file {file.name}")
662
+
663
+ def _merge_file(chan_dirs, stack_dir, file_name):
664
+ """
665
+ Merge multiple channels into a single stack and save it as a numpy array, using os module for path handling.
666
+
631
667
  Args:
632
- dir_path (str): The path to the directory.
668
+ chan_dirs (list): List of directories containing channel images.
669
+ stack_dir (str): Directory to save the merged stack.
670
+ file_name (str): File name of the channel image.
633
671
 
634
672
  Returns:
635
- bool: True if the directory is empty, False otherwise.
673
+ None
674
+ """
675
+ # Construct new file path
676
+ file_root, file_ext = os.path.splitext(file_name)
677
+ new_file = os.path.join(stack_dir, file_root + '.npy')
678
+
679
+ # Check if the new file exists and create the stack directory if it doesn't
680
+ if not os.path.exists(new_file):
681
+ os.makedirs(stack_dir, exist_ok=True)
682
+ channels = []
683
+ for i, chan_dir in enumerate(chan_dirs):
684
+ img_path = os.path.join(chan_dir, file_name)
685
+ img = cv2.imread(img_path, -1)
686
+ if img is None:
687
+ print(f"Warning: Failed to read image {img_path}")
688
+ continue
689
+ chan = np.expand_dims(img, axis=2)
690
+ channels.append(chan)
691
+ del img # Explicitly delete the reference to the image to free up memory
692
+ if i % 10 == 0: # Periodically suggest garbage collection
693
+ gc.collect()
694
+
695
+ if channels:
696
+ stack = np.concatenate(channels, axis=2)
697
+ np.save(new_file, stack)
698
+ else:
699
+ print(f"No valid channels to merge for file {file_name}")
700
+
701
+ def _is_dir_empty(dir_path):
702
+ """
703
+ Check if a directory is empty using os module.
636
704
  """
637
705
  return len(os.listdir(dir_path)) == 0
638
706
 
@@ -706,7 +774,7 @@ def _move_to_chan_folder(src, regex, timelapse=False, metadata_type=''):
706
774
  if metadata_type =='cq1':
707
775
  orig_wellID = wellID
708
776
  wellID = _convert_cq1_well_id(wellID)
709
- print(f'Converted Well ID: {orig_wellID} to {wellID}')
777
+ print(f'Converted Well ID: {orig_wellID} to {wellID}')#, end='\r', flush=True)
710
778
 
711
779
  newname = f"{plateID}_{wellID}_{fieldID}_{timeID if timelapse else ''}{ext}"
712
780
  newpath = src / chanID
@@ -732,7 +800,7 @@ def _move_to_chan_folder(src, regex, timelapse=False, metadata_type=''):
732
800
  shutil.move(os.path.join(src, filename), move)
733
801
  return
734
802
 
735
- def _merge_channels(src, plot=False):
803
+ def _merge_channels_v2(src, plot=False):
736
804
  from .plot import plot_arrays
737
805
  """
738
806
  Merge the channels in the given source directory and save the merged files in a 'stack' directory.
@@ -757,9 +825,11 @@ def _merge_channels(src, plot=False):
757
825
 
758
826
  # Create the 'stack' directory if it doesn't exist
759
827
  stack_dir.mkdir(exist_ok=True)
828
+ print(f'generated folder with merged arrays: {stack_dir}')
760
829
 
761
830
  if _is_dir_empty(stack_dir):
762
- with Pool(cpu_count()) as pool:
831
+ with Pool(max(cpu_count() // 2, 1)) as pool:
832
+ #with Pool(cpu_count()) as pool:
763
833
  merge_func = partial(_merge_file, chan_dirs, stack_dir)
764
834
  pool.map(merge_func, dir_files)
765
835
 
@@ -771,6 +841,47 @@ def _merge_channels(src, plot=False):
771
841
 
772
842
  return
773
843
 
844
+ def _merge_channels(src, plot=False):
845
+ """
846
+ Merge the channels in the given source directory and save the merged files in a 'stack' directory without using multiprocessing.
847
+ """
848
+
849
+ from .plot import plot_arrays
850
+
851
+ stack_dir = os.path.join(src, 'stack')
852
+ allowed_names = ['01', '02', '03', '04', '00', '1', '2', '3', '4', '0']
853
+
854
+ # List directories that match the allowed names
855
+ chan_dirs = [d for d in os.listdir(src) if os.path.isdir(os.path.join(src, d)) and d in allowed_names]
856
+ chan_dirs.sort()
857
+
858
+ print(f'List of folders in src: {chan_dirs}. Single channel folders.')
859
+ start_time = time.time()
860
+
861
+ # Assuming chan_dirs[0] is not empty and exists, adjust according to your logic
862
+ first_dir_path = os.path.join(src, chan_dirs[0])
863
+ dir_files = os.listdir(first_dir_path)
864
+
865
+ # Create the 'stack' directory if it doesn't exist
866
+ if not os.path.exists(stack_dir):
867
+ os.makedirs(stack_dir, exist_ok=True)
868
+ print(f'Generated folder with merged arrays: {stack_dir}')
869
+
870
+ if _is_dir_empty(stack_dir):
871
+ for file_name in dir_files:
872
+ full_file_path = os.path.join(first_dir_path, file_name)
873
+ if os.path.isfile(full_file_path):
874
+ _merge_file([os.path.join(src, d) for d in chan_dirs], stack_dir, file_name)
875
+
876
+ elapsed_time = time.time() - start_time
877
+ avg_time = elapsed_time / len(dir_files) if dir_files else 0
878
+ print(f'Average Time: {avg_time:.3f} sec, Total Elapsed Time: {elapsed_time:.3f} sec')
879
+
880
+ if plot:
881
+ plot_arrays(os.path.join(src, 'stack'))
882
+
883
+ return
884
+
774
885
  def _mip_all(src, include_first_chan=True):
775
886
 
776
887
  """
@@ -819,6 +930,7 @@ def _mip_all(src, include_first_chan=True):
819
930
  np.save(os.path.join(src, filename), concatenated)
820
931
  return
821
932
 
933
+ @log_function_call
822
934
  def _concatenate_channel(src, channels, randomize=True, timelapse=False, batch_size=100):
823
935
  """
824
936
  Concatenates channel data from multiple files and saves the concatenated data as numpy arrays.
@@ -853,7 +965,7 @@ def _concatenate_channel(src, channels, randomize=True, timelapse=False, batch_s
853
965
  array = np.take(array, channels, axis=2)
854
966
  stack_region.append(array)
855
967
  filenames_region.append(os.path.basename(path))
856
- clear_output(wait=True)
968
+ #clear_output(wait=True)
857
969
  print(f'Region {i+1}/ {len(time_stack_path_lists)}', end='\r', flush=True)
858
970
  stack = np.stack(stack_region)
859
971
  save_loc = os.path.join(channel_stack_loc, f'{name}.npz')
@@ -879,7 +991,7 @@ def _concatenate_channel(src, channels, randomize=True, timelapse=False, batch_s
879
991
  array = np.take(array, channels, axis=2)
880
992
  stack_ls.append(array)
881
993
  filenames_batch.append(os.path.basename(path)) # store the filename
882
- clear_output(wait=True)
994
+ #clear_output(wait=True)
883
995
  print(f'Concatenated: {i+1}/{nr_files} files')
884
996
  #print(f'Concatenated: {i+1}/{nr_files} files', end='\r', flush=True)
885
997
 
@@ -887,7 +999,7 @@ def _concatenate_channel(src, channels, randomize=True, timelapse=False, batch_s
887
999
  unique_shapes = {arr.shape[:-1] for arr in stack_ls}
888
1000
  if len(unique_shapes) > 1:
889
1001
  max_dims = np.max(np.array(list(unique_shapes)), axis=0)
890
- clear_output(wait=True)
1002
+ #clear_output(wait=True)
891
1003
  print(f'Warning: arrays with multiple shapes found in batch {i+1}. Padding arrays to max X,Y dimentions {max_dims}')
892
1004
  #print(f'Warning: arrays with multiple shapes found in batch {i+1}. Padding arrays to max X,Y dimentions {max_dims}', end='\r', flush=True)
893
1005
  padded_stack_ls = []
@@ -1015,7 +1127,7 @@ def _normalize_stack(src, backgrounds=[100,100,100], remove_background=False, lo
1015
1127
  duration = (stop - start)*single_channel.shape[0]
1016
1128
  time_ls.append(duration)
1017
1129
  average_time = np.mean(time_ls) if len(time_ls) > 0 else 0
1018
- clear_output(wait=True)
1130
+ #clear_output(wait=True)
1019
1131
  print(f'Progress: files {file_index+1}/{len(paths)}, channels:{chan_index}/{stack.shape[-1]-1}, arrays:{array_index+1}/{single_channel.shape[0]}, Signal:{upper:.1f}, noise:{lower:.1f}, Signal-to-noise:{average_stnr:.1f}, Time/channel:{average_time:.2f}sec')
1020
1132
  #print(f'Progress: files {file_index+1}/{len(paths)}, channels:{chan_index}/{stack.shape[-1]-1}, arrays:{array_index+1}/{single_channel.shape[0]}, Signal:{upper:.1f}, noise:{lower:.1f}, Signal-to-noise:{average_stnr:.1f}, Time/channel:{average_time:.2f}sec', end='\r', flush=True)
1021
1133
  normalized_single_channel = exposure.rescale_intensity(arr_2d_normalized, out_range='dtype')
@@ -1072,8 +1184,6 @@ def _normalize_timelapse(src, lower_quantile=0.01, save_dtype=np.float32):
1072
1184
 
1073
1185
  print(f'\nSaved normalized stacks: {output_fldr}')
1074
1186
 
1075
-
1076
-
1077
1187
  def _create_movies_from_npy_per_channel(src, fps=10):
1078
1188
  """
1079
1189
  Create movies from numpy files per channel.
@@ -1125,9 +1235,33 @@ def _create_movies_from_npy_per_channel(src, fps=10):
1125
1235
  channel_save_path = os.path.join(save_path, f'{plate}_{well}_{field}_channel_{channel}.mp4')
1126
1236
  _npz_to_movie(normalized_channel_arrays_3d, filenames, channel_save_path, fps)
1127
1237
 
1238
+ def delete_empty_subdirectories(folder_path):
1239
+ """
1240
+ Deletes all empty subdirectories in the specified folder.
1241
+
1242
+ Args:
1243
+ - folder_path (str): The path to the folder in which to look for empty subdirectories.
1244
+ """
1245
+ # Check each item in the specified folder
1246
+ for dirpath, dirnames, filenames in os.walk(folder_path, topdown=False):
1247
+ # os.walk is used with topdown=False to start from the innermost directories and work upwards.
1248
+ for dirname in dirnames:
1249
+ # Construct the full path to the subdirectory
1250
+ full_dir_path = os.path.join(dirpath, dirname)
1251
+ # Try to remove the directory and catch any error (like if the directory is not empty)
1252
+ try:
1253
+ os.rmdir(full_dir_path)
1254
+ print(f"Deleted empty directory: {full_dir_path}")
1255
+ except OSError as e:
1256
+ continue
1257
+ # An error occurred, likely because the directory is not empty
1258
+ #print(f"Skipping non-empty directory: {full_dir_path}")
1259
+
1260
+ @log_function_call
1128
1261
  def preprocess_img_data(settings):
1129
1262
 
1130
1263
  from .plot import plot_arrays, _plot_4D_arrays
1264
+ from .utils import _run_test_mode
1131
1265
 
1132
1266
  """
1133
1267
  Preprocesses image data by converting z-stack images to maximum intensity projection (MIP) images.
@@ -1158,12 +1292,16 @@ def preprocess_img_data(settings):
1158
1292
  Returns:
1159
1293
  None
1160
1294
  """
1295
+
1161
1296
  src = settings['src']
1162
1297
  valid_ext = ['tif', 'tiff', 'png', 'jpeg']
1163
1298
  files = os.listdir(src)
1164
1299
  extensions = [file.split('.')[-1] for file in files]
1165
1300
  extension_counts = Counter(extensions)
1166
1301
  most_common_extension = extension_counts.most_common(1)[0][0]
1302
+ img_format = None
1303
+
1304
+ delete_empty_subdirectories(src)
1167
1305
 
1168
1306
  # Check if the most common extension is one of the specified image formats
1169
1307
  if most_common_extension in valid_ext:
@@ -1171,16 +1309,24 @@ def preprocess_img_data(settings):
1171
1309
  print(f'Found {extension_counts[most_common_extension]} {most_common_extension} files')
1172
1310
  else:
1173
1311
  print(f'Could not find any {valid_ext} files in {src} only found {extension_counts[0]}')
1174
- return
1175
-
1312
+ if os.path.exists(src+'/stack'):
1313
+ print('Found existing stack folder.')
1314
+ if os.path.exists(src+'/channel_stack'):
1315
+ print('Found existing channel_stack folder.')
1316
+ if os.path.exists(src+'/norm_channel_stack'):
1317
+ print('Found existing norm_channel_stack folder. Skipping preprocessing')
1318
+ return settings, src
1319
+
1176
1320
  cmap = 'inferno'
1177
1321
  figuresize = 20
1178
1322
  normalize = True
1179
1323
  save_dtype = 'uint16'
1180
1324
  correct_illumination = False
1181
1325
 
1182
- mask_channels = [settings['nucleus_channel'], settings['pathogen_channel'], settings['cell_channel']]
1183
- backgrounds = [settings['nucleus_background'], settings['pathogen_background'], settings['cell_background']]
1326
+ #mask_channels = [settings['nucleus_channel'], settings['pathogen_channel'], settings['cell_channel']]
1327
+ #backgrounds = [settings['nucleus_background'], settings['pathogen_background'], settings['cell_background']]
1328
+ mask_channels = [settings['nucleus_channel'], settings['cell_channel'], settings['pathogen_channel']]
1329
+ backgrounds = [settings['nucleus_background'], settings['cell_background'], settings['pathogen_background']]
1184
1330
 
1185
1331
  metadata_type = settings['metadata_type']
1186
1332
  custom_regex = settings['custom_regex']
@@ -1194,57 +1340,78 @@ def preprocess_img_data(settings):
1194
1340
  all_to_mip = settings['all_to_mip']
1195
1341
  pick_slice = settings['pick_slice']
1196
1342
  skip_mode = settings['skip_mode']
1197
-
1198
- if metadata_type == 'cellvoyager':
1199
- regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
1200
- elif metadata_type == 'cq1':
1201
- regex = f'W(?P<wellID>.*)F(?P<fieldID>.*)T(?P<timeID>.*)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
1202
- elif metadata_type == 'nikon':
1203
- regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
1204
- elif metadata_type == 'zeis':
1205
- regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
1206
- elif metadata_type == 'leica':
1207
- regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
1208
- elif metadata_type == 'custom':
1209
- regex = f'({custom_regex}){img_format}'
1343
+
1344
+ if not img_format == None:
1345
+ if metadata_type == 'cellvoyager':
1346
+ regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
1347
+ elif metadata_type == 'cq1':
1348
+ regex = f'W(?P<wellID>.*)F(?P<fieldID>.*)T(?P<timeID>.*)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
1349
+ elif metadata_type == 'nikon':
1350
+ regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
1351
+ elif metadata_type == 'zeis':
1352
+ regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
1353
+ elif metadata_type == 'leica':
1354
+ regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
1355
+ elif metadata_type == 'custom':
1356
+ regex = f'({custom_regex}){img_format}'
1210
1357
 
1211
- print(f'regex mode:{metadata_type} regex:{regex}')
1358
+ print(f'regex mode:{metadata_type} regex:{regex}')
1359
+
1360
+ if settings.get('test_mode', False):
1361
+ print(f'Running spacr in test mode')
1362
+ settings['plot'] = True
1363
+ try:
1364
+ os.rmdir(os.path.join(src, 'test'))
1365
+ print(f"Deleted test directory: {os.path.join(src, 'test')}")
1366
+ except OSError as e:
1367
+ pass
1368
+
1369
+ src = _run_test_mode(settings['src'], regex, timelapse=timelapse)
1370
+ settings['src'] = src
1371
+
1372
+ if img_format == None:
1373
+ if not os.path.exists(src+'/stack'):
1374
+ _merge_channels(src, plot=False)
1212
1375
 
1213
1376
  if not os.path.exists(src+'/stack'):
1214
- if timelapse:
1215
- _move_to_chan_folder(src, regex, timelapse, metadata_type)
1216
- else:
1217
- #_z_to_mip(src, regex, batch_size, pick_slice, skip_mode, metadata_type, img_format)
1218
- _rename_and_organize_image_files(src, regex, batch_size, pick_slice, skip_mode, metadata_type, img_format)
1219
-
1220
- #Make sure no batches will be of only one image
1221
- all_imgs = len(src+'/stack')
1222
- full_batches = all_imgs // batch_size
1223
- last_batch_size = all_imgs % batch_size
1224
-
1225
- # Check if the last batch is of size 1
1226
- if last_batch_size == 1:
1227
- # If there's only one batch and its size is 1, it's also an issue
1228
- if full_batches == 0:
1229
- raise ValueError("Only one batch of size 1 detected. Adjust the batch size.")
1230
- # If the last batch is of size 1, merge it with the second last batch
1231
- elif full_batches > 0:
1232
- raise ValueError("Last batch of size 1 detected. Adjust the batch size.")
1233
-
1234
- _merge_channels(src, plot=False)
1235
- if timelapse:
1236
- _create_movies_from_npy_per_channel(src+'/stack', fps=2)
1237
-
1238
- if plot:
1239
- print(f'plotting {nr} images from {src}/stack')
1240
- plot_arrays(src+'/stack', figuresize, cmap, nr=nr, normalize=normalize)
1241
- if all_to_mip:
1242
- _mip_all(src+'/stack')
1243
- if plot:
1244
- print(f'plotting {nr} images from {src}/stack')
1245
- plot_arrays(src+'/stack', figuresize, cmap, nr=nr, normalize=normalize)
1246
- #nr_of_stacks = len(src+'/channel_stack')
1247
-
1377
+ try:
1378
+ if not img_format == None:
1379
+ if timelapse:
1380
+ _move_to_chan_folder(src, regex, timelapse, metadata_type)
1381
+ else:
1382
+ _rename_and_organize_image_files(src, regex, batch_size, pick_slice, skip_mode, metadata_type, img_format)
1383
+
1384
+ #Make sure no batches will be of only one image
1385
+ all_imgs = len(src+'/stack')
1386
+ full_batches = all_imgs // batch_size
1387
+ last_batch_size = all_imgs % batch_size
1388
+
1389
+ # Check if the last batch is of size 1
1390
+ if last_batch_size == 1:
1391
+ # If there's only one batch and its size is 1, it's also an issue
1392
+ if full_batches == 0:
1393
+ raise ValueError("Only one batch of size 1 detected. Adjust the batch size.")
1394
+ # If the last batch is of size 1, merge it with the second last batch
1395
+ elif full_batches > 0:
1396
+ raise ValueError("Last batch of size 1 detected. Adjust the batch size.")
1397
+
1398
+ _merge_channels(src, plot=False)
1399
+
1400
+ if timelapse:
1401
+ _create_movies_from_npy_per_channel(src+'/stack', fps=2)
1402
+
1403
+ if plot:
1404
+ print(f'plotting {nr} images from {src}/stack')
1405
+ plot_arrays(src+'/stack', figuresize, cmap, nr=nr, normalize=normalize)
1406
+ if all_to_mip:
1407
+ _mip_all(src+'/stack')
1408
+ if plot:
1409
+ print(f'plotting {nr} images from {src}/stack')
1410
+ plot_arrays(src+'/stack', figuresize, cmap, nr=nr, normalize=normalize)
1411
+ except Exception as e:
1412
+ print(f"Error: {e}")
1413
+
1414
+ print('concatinating cahnnels')
1248
1415
  _concatenate_channel(src+'/stack',
1249
1416
  channels=mask_channels,
1250
1417
  randomize=randomize,
@@ -1254,7 +1421,6 @@ def preprocess_img_data(settings):
1254
1421
  if plot:
1255
1422
  print(f'plotting {nr} images from {src}/channel_stack')
1256
1423
  _plot_4D_arrays(src+'/channel_stack', figuresize, cmap, nr_npz=1, nr=nr)
1257
- nr_of_chan_stacks = len(src+'/channel_stack')
1258
1424
 
1259
1425
  backgrounds, signal_to_noise, signal_thresholds = _get_lists_for_normalization(settings=settings)
1260
1426
 
@@ -1273,7 +1439,7 @@ def preprocess_img_data(settings):
1273
1439
  if plot:
1274
1440
  _plot_4D_arrays(src+'/norm_channel_stack', nr_npz=1, nr=nr)
1275
1441
 
1276
- return
1442
+ return settings, src
1277
1443
 
1278
1444
  def _check_masks(batch, batch_filenames, output_folder):
1279
1445
  """
@@ -1295,8 +1461,7 @@ def _check_masks(batch, batch_filenames, output_folder):
1295
1461
  filtered_filenames = [f for f, exists in zip(batch_filenames, existing_files_mask) if exists]
1296
1462
 
1297
1463
  return np.array(filtered_batch), filtered_filenames
1298
-
1299
-
1464
+
1300
1465
  def _get_avg_object_size(masks):
1301
1466
  """
1302
1467
  Calculate the average size of objects in a list of masks.
@@ -1450,6 +1615,56 @@ def _save_settings_to_db(settings):
1450
1615
  settings_df.to_sql('settings', conn, if_exists='replace', index=False) # Replace the table if it already exists
1451
1616
  conn.close()
1452
1617
 
1618
+ def _save_mask_timelapse_as_gif_v1(masks, path, cmap, norm, filenames):
1619
+ """
1620
+ Save a timelapse of masks as a GIF.
1621
+
1622
+ Parameters:
1623
+ masks (list): List of mask frames.
1624
+ path (str): Path to save the GIF.
1625
+ cmap: Colormap for displaying the masks.
1626
+ norm: Normalization for the masks.
1627
+ filenames (list): List of filenames corresponding to each mask frame.
1628
+
1629
+ Returns:
1630
+ None
1631
+ """
1632
+ def _update(frame):
1633
+ """
1634
+ Update the plot with the given frame.
1635
+
1636
+ Parameters:
1637
+ frame (int): The frame number to update the plot with.
1638
+
1639
+ Returns:
1640
+ None
1641
+ """
1642
+ nonlocal filename_text_obj
1643
+ if filename_text_obj is not None:
1644
+ filename_text_obj.remove()
1645
+ ax.clear()
1646
+ ax.axis('off')
1647
+ current_mask = masks[frame]
1648
+ ax.imshow(current_mask, cmap=cmap, norm=norm)
1649
+ ax.set_title(f'Frame: {frame}', fontsize=24, color='white')
1650
+ filename_text = filenames[frame]
1651
+ filename_text_obj = fig.text(0.5, 0.01, filename_text, ha='center', va='center', fontsize=20, color='white')
1652
+ for label_value in np.unique(current_mask):
1653
+ if label_value == 0: continue # Skip background
1654
+ y, x = np.mean(np.where(current_mask == label_value), axis=1)
1655
+ ax.text(x, y, str(label_value), color='white', fontsize=24, ha='center', va='center')
1656
+
1657
+ fig, ax = plt.subplots(figsize=(50, 50), facecolor='black')
1658
+ ax.set_facecolor('black')
1659
+ ax.axis('off')
1660
+ plt.subplots_adjust(left=0, right=1, top=1, bottom=0, wspace=0, hspace=0)
1661
+
1662
+ filename_text_obj = None
1663
+ anim = FuncAnimation(fig, _update, frames=len(masks), blit=False)
1664
+ anim.save(path, writer='pillow', fps=2, dpi=80) # Adjust DPI for size/quality
1665
+ plt.close(fig)
1666
+ print(f'Saved timelapse to {path}')
1667
+
1453
1668
  def _save_mask_timelapse_as_gif(masks, tracks_df, path, cmap, norm, filenames):
1454
1669
  """
1455
1670
  Save a timelapse animation of masks as a GIF.
@@ -1504,9 +1719,10 @@ def _save_mask_timelapse_as_gif(masks, tracks_df, path, cmap, norm, filenames):
1504
1719
  ax.text(x, y, str(label_value), color='white', fontsize=24, ha='center', va='center')
1505
1720
 
1506
1721
  # Overlay tracks
1507
- for track in tracks_df['track_id'].unique():
1508
- _track = tracks_df[tracks_df['track_id'] == track]
1509
- ax.plot(_track['x'], _track['y'], '-w', linewidth=1)
1722
+ if tracks_df is not None:
1723
+ for track in tracks_df['track_id'].unique():
1724
+ _track = tracks_df[tracks_df['track_id'] == track]
1725
+ ax.plot(_track['x'], _track['y'], '-w', linewidth=1)
1510
1726
 
1511
1727
  anim = FuncAnimation(fig, _update, frames=len(masks), blit=False)
1512
1728
  anim.save(path, writer='pillow', fps=2, dpi=80) # Adjust DPI for size/quality
@@ -1620,56 +1836,63 @@ def _load_and_concatenate_arrays(src, channels, cell_chann_dim, nucleus_chann_di
1620
1836
 
1621
1837
  # Iterate through each file in the reference folder
1622
1838
  for filename in os.listdir(reference_folder):
1623
-
1624
1839
  stack_ls = []
1625
- array_path = []
1626
-
1627
1840
  if filename.endswith('.npy'):
1628
- count+=1
1629
- # Initialize the concatenated array with the array from the reference folder
1630
- concatenated_array = np.load(os.path.join(reference_folder, filename))
1631
- if channels is not None:
1632
- concatenated_array = np.take(concatenated_array, channels, axis=2)
1841
+ count += 1
1842
+
1843
+ # Check if this file exists in all the other specified folders
1844
+ exists_in_all_folders = all(os.path.isfile(os.path.join(folder, filename)) for folder in folder_paths)
1845
+
1846
+ if exists_in_all_folders:
1847
+ # Load and potentially modify the array from the reference folder
1848
+ ref_array_path = os.path.join(reference_folder, filename)
1849
+ concatenated_array = np.load(ref_array_path)
1850
+
1851
+ if channels is not None:
1852
+ concatenated_array = np.take(concatenated_array, channels, axis=2)
1853
+
1854
+ # Add the array from the reference folder to 'stack_ls'
1633
1855
  stack_ls.append(concatenated_array)
1634
- # For each of the other folders, load the array and concatenate it
1635
- for folder in folder_paths[1:]:
1636
- array_path = os.path.join(folder, filename)
1637
- if os.path.isfile(array_path):
1856
+
1857
+ # For each of the other folders, load the array and add it to 'stack_ls'
1858
+ for folder in folder_paths[1:]:
1859
+ array_path = os.path.join(folder, filename)
1638
1860
  array = np.load(array_path)
1639
1861
  if array.ndim == 2:
1640
- array = np.expand_dims(array, axis=-1) # add an extra dimension if the array is 2D
1862
+ array = np.expand_dims(array, axis=-1) # Add an extra dimension if the array is 2D
1641
1863
  stack_ls.append(array)
1642
1864
 
1643
- stack_ls = [np.expand_dims(arr, axis=-1) if arr.ndim == 2 else arr for arr in stack_ls]
1644
- unique_shapes = {arr.shape[:-1] for arr in stack_ls}
1645
- if len(unique_shapes) > 1:
1646
- #max_dims = np.max(np.array(list(unique_shapes)), axis=0)
1647
- # Determine the maximum length of tuples in unique_shapes
1648
- max_tuple_length = max(len(shape) for shape in unique_shapes)
1649
- # Pad shorter tuples with zeros to make them all the same length
1650
- padded_shapes = [shape + (0,) * (max_tuple_length - len(shape)) for shape in unique_shapes]
1651
- # Now create a NumPy array and find the maximum dimensions
1652
- max_dims = np.max(np.array(padded_shapes), axis=0)
1653
- clear_output(wait=True)
1654
- print(f'Warning: arrays with multiple shapes found. Padding arrays to max X,Y dimentions {max_dims}')
1655
- #print(f'Warning: arrays with multiple shapes found. Padding arrays to max X,Y dimentions {max_dims}', end='\r', flush=True)
1656
- padded_stack_ls = []
1657
- for arr in stack_ls:
1658
- pad_width = [(0, max_dim - dim) for max_dim, dim in zip(max_dims, arr.shape[:-1])]
1659
- pad_width.append((0, 0))
1660
- padded_arr = np.pad(arr, pad_width)
1661
- padded_stack_ls.append(padded_arr)
1662
- # Concatenate the padded arrays along the channel dimension (last dimension)
1663
- stack = np.concatenate(padded_stack_ls, axis=-1)
1865
+ if len(stack_ls) > 0:
1866
+ stack_ls = [np.expand_dims(arr, axis=-1) if arr.ndim == 2 else arr for arr in stack_ls]
1867
+ unique_shapes = {arr.shape[:-1] for arr in stack_ls}
1868
+ if len(unique_shapes) > 1:
1869
+ #max_dims = np.max(np.array(list(unique_shapes)), axis=0)
1870
+ # Determine the maximum length of tuples in unique_shapes
1871
+ max_tuple_length = max(len(shape) for shape in unique_shapes)
1872
+ # Pad shorter tuples with zeros to make them all the same length
1873
+ padded_shapes = [shape + (0,) * (max_tuple_length - len(shape)) for shape in unique_shapes]
1874
+ # Now create a NumPy array and find the maximum dimensions
1875
+ max_dims = np.max(np.array(padded_shapes), axis=0)
1876
+ #clear_output(wait=True)
1877
+ print(f'Warning: arrays with multiple shapes found. Padding arrays to max X,Y dimentions {max_dims}')
1878
+ #print(f'Warning: arrays with multiple shapes found. Padding arrays to max X,Y dimentions {max_dims}', end='\r', flush=True)
1879
+ padded_stack_ls = []
1880
+ for arr in stack_ls:
1881
+ pad_width = [(0, max_dim - dim) for max_dim, dim in zip(max_dims, arr.shape[:-1])]
1882
+ pad_width.append((0, 0))
1883
+ padded_arr = np.pad(arr, pad_width)
1884
+ padded_stack_ls.append(padded_arr)
1885
+ # Concatenate the padded arrays along the channel dimension (last dimension)
1886
+ stack = np.concatenate(padded_stack_ls, axis=-1)
1664
1887
 
1665
- else:
1666
- stack = np.concatenate(stack_ls, axis=-1)
1888
+ else:
1889
+ stack = np.concatenate(stack_ls, axis=-1)
1667
1890
 
1668
- if stack.shape[-1] > concatenated_array.shape[-1]:
1669
- output_path = os.path.join(output_folder, filename)
1670
- np.save(output_path, stack)
1891
+ if stack.shape[-1] > concatenated_array.shape[-1]:
1892
+ output_path = os.path.join(output_folder, filename)
1893
+ np.save(output_path, stack)
1671
1894
 
1672
- clear_output(wait=True)
1895
+ #clear_output(wait=True)
1673
1896
  print(f'Files merged: {count}/{all_imgs}')
1674
1897
  #print(f'Files merged: {count}/{all_imgs}', end='\r', flush=True)
1675
1898
  return
@@ -2145,9 +2368,86 @@ def _read_mask(mask_path):
2145
2368
  if mask.dtype != np.uint16:
2146
2369
  mask = img_as_uint(mask)
2147
2370
  return mask
2371
+
2372
+
2373
+ def convert_numpy_to_tiff(folder_path, limit=None):
2374
+ """
2375
+ Converts all numpy files in a folder to TIFF format and saves them in a subdirectory 'tiff'.
2148
2376
 
2377
+ Args:
2378
+ folder_path (str): The path to the folder containing numpy files.
2379
+ """
2380
+ # Create the subdirectory 'tiff' within the specified folder if it doesn't already exist
2381
+ tiff_subdir = os.path.join(folder_path, 'tiff')
2382
+ os.makedirs(tiff_subdir, exist_ok=True)
2383
+
2384
+ files = os.listdir(folder_path)
2385
+
2386
+ npy_files = [f for f in files if f.endswith('.npy')]
2149
2387
 
2388
+ # Iterate over all files in the folder
2389
+ for i, filename in enumerate(files):
2390
+ if limit is not None and i >= limit:
2391
+ break
2392
+ if not filename.endswith('.npy'):
2393
+ continue
2394
+
2395
+ # Construct the full file path
2396
+ file_path = os.path.join(folder_path, filename)
2397
+ # Load the numpy file
2398
+ numpy_array = np.load(file_path)
2399
+
2400
+ # Construct the output TIFF file path
2401
+ tiff_filename = os.path.splitext(filename)[0] + '.tif'
2402
+ tiff_file_path = os.path.join(tiff_subdir, tiff_filename)
2403
+
2404
+ # Save the numpy array as a TIFF file
2405
+ tifffile.imwrite(tiff_file_path, numpy_array)
2406
+
2407
+ print(f"Converted {filename} to {tiff_filename} and saved in 'tiff' subdirectory.")
2408
+ return
2150
2409
 
2410
+ def generate_cellpose_train_test(src, test_split=0.1):
2411
+
2412
+ mask_src = os.path.join(src, 'masks')
2413
+ img_paths = glob.glob(os.path.join(src, '*.tif'))
2414
+ img_filenames = [os.path.basename(file) for file in img_paths + img_paths]
2415
+ img_filenames = [file for file in img_filenames if os.path.exists(os.path.join(mask_src, file))]
2416
+ print(f'Found {len(img_filenames)} images with masks')
2417
+
2418
+ random.shuffle(img_filenames)
2419
+ split_index = int(len(img_filenames) * test_split)
2420
+ train_files = img_filenames[split_index:]
2421
+ test_files = img_filenames[:split_index]
2422
+ list_of_lists = [test_files, train_files]
2423
+ print(f'Split dataset into Train {len(train_files)} and Test {len(test_files)} files')
2424
+
2425
+ train_dir = os.path.join(os.path.dirname(src), 'train')
2426
+ train_dir_masks = os.path.join(train_dir, 'mask')
2427
+ test_dir = os.path.join(os.path.dirname(src), 'test')
2428
+ test_dir_masks = os.path.join(test_dir, 'mask')
2429
+
2430
+ os.makedirs(train_dir_masks, exist_ok=True)
2431
+ os.makedirs(test_dir_masks, exist_ok=True)
2432
+ for i, ls in enumerate(list_of_lists):
2433
+
2434
+ if i == 0:
2435
+ dst = test_dir
2436
+ dst_mask = test_dir_masks
2437
+ _type = 'Test'
2438
+ if i == 1:
2439
+ dst = train_dir
2440
+ dst_mask = train_dir_masks
2441
+ _type = 'Train'
2442
+
2443
+ for idx, filename in enumerate(ls):
2444
+ img_path = os.path.join(src, filename)
2445
+ mask_path = os.path.join(mask_src, filename)
2446
+ new_img_path = os.path.join(dst, filename)
2447
+ new_mask_path = os.path.join(dst_mask, filename)
2448
+ shutil.copy(img_path, new_img_path)
2449
+ shutil.copy(mask_path, new_mask_path)
2450
+ print(f'Copied {idx+1}/{len(ls)} images to {_type} set', end='\r', flush=True)
2151
2451
 
2152
2452
 
2153
2453