PyPI - spacr - Versions diffs - 0.4.12__py3-none-any.whl → 0.4.60__py3-none-any.whl - Mend

spacr 0.4.12py3-none-any.whl → 0.4.60py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

spacr/core.py +54 -8
spacr/deep_spacr.py +2 -3
spacr/gui_core.py +259 -75
spacr/gui_elements.py +133 -2
spacr/gui_utils.py +24 -20
spacr/io.py +553 -61
spacr/measure.py +11 -12
spacr/ml.py +141 -258
spacr/plot.py +76 -34
spacr/sequencing.py +73 -38
spacr/settings.py +160 -93
spacr/submodules.py +620 -214
spacr/timelapse.py +25 -25
spacr/toxo.py +23 -23
spacr/utils.py +249 -95
{spacr-0.4.12.dist-info → spacr-0.4.60.dist-info}/METADATA +2 -1
{spacr-0.4.12.dist-info → spacr-0.4.60.dist-info}/RECORD +21 -21
{spacr-0.4.12.dist-info → spacr-0.4.60.dist-info}/LICENSE +0 -0
{spacr-0.4.12.dist-info → spacr-0.4.60.dist-info}/WHEEL +0 -0
{spacr-0.4.12.dist-info → spacr-0.4.60.dist-info}/entry_points.txt +0 -0
{spacr-0.4.12.dist-info → spacr-0.4.60.dist-info}/top_level.txt +0 -0

spacr/io.py CHANGED Viewed

@@ -23,6 +23,7 @@ import seaborn as sns
 from nd2reader import ND2Reader
 from torchvision import transforms
 from sklearn.model_selection import train_test_split
+import readlif
 def process_non_tif_non_2D_images(folder):
     """Processes all images in the folder and splits them into grayscale channels, preserving bit depth."""
@@ -131,58 +132,61 @@ def process_non_tif_non_2D_images(folder):
 def _load_images_and_labels(image_files, label_files, invert=False):
-    from .utils import invert_image, apply_mask
+    from .utils import invert_image
     images = []
     labels = []
-    if not image_files is None:
-        image_names = sorted([os.path.basename(f) for f in image_files])
-    else:
-        image_names = []
-    if not label_files is None:
-        label_names = sorted([os.path.basename(f) for f in label_files])
-    else:
-        label_names = []
-    if not image_files is None and not label_files is None:
+    image_names = sorted([os.path.basename(f) for f in image_files]) if image_files else []
+    label_names = sorted([os.path.basename(f) for f in label_files]) if label_files else []
+    if image_files and label_files:
         for img_file, lbl_file in zip(image_files, label_files):
             image = cellpose.io.imread(img_file)
+            if image is None:
+                print(f"WARNING: Could not load image: {img_file}")
+                continue
             if invert:
                 image = invert_image(image)
-            label = cellpose.io.imread(lbl_file)
             if image.max() > 1:
                 image = image / image.max()
+            label = cellpose.io.imread(lbl_file)
+            if label is None:
+                print(f"WARNING: Could not load label: {lbl_file}")
+                continue
             images.append(image)
             labels.append(label)
-    elif not image_files is None:
+    elif image_files:
         for img_file in image_files:
             image = cellpose.io.imread(img_file)
+            if image is None:
+                print(f"WARNING: Could not load image: {img_file}")
+                continue
             if invert:
                 image = invert_image(image)
             if image.max() > 1:
                 image = image / image.max()
             images.append(image)
-    elif not image_files is None:
-            for lbl_file in label_files:
-                label = cellpose.io.imread(lbl_file)
+    elif label_files:
+        for lbl_file in label_files:
+            label = cellpose.io.imread(lbl_file)
+            if label is None:
+                print(f"WARNING: Could not load label: {lbl_file}")
+                continue
             labels.append(label)
-    if not image_files is None:
-        image_dir = os.path.dirname(image_files[0])
-    else:
-        image_dir = None
-    if not label_files is None:
-        label_dir = os.path.dirname(label_files[0])
-    else:
-        label_dir = None
-    # Log the number of loaded images and labels
+    image_dir = os.path.dirname(image_files[0]) if image_files else None
+    label_dir = os.path.dirname(label_files[0]) if label_files else None
     print(f'Loaded {len(images)} images and {len(labels)} labels from {image_dir} and {label_dir}')
-    if len(labels) > 0 and len(images) > 0:
-        print(f'image shape: {images[0].shape}, image type: images[0].shape mask shape: {labels[0].shape}, image type: labels[0].shape')
+    if images and labels:
+        print(f'image shape: {images[0].shape}, image type: {images[0].dtype}; '
+              f'label shape: {labels[0].shape}, label type: {labels[0].dtype}')
     return images, labels, image_names, label_names
 def _load_normalized_images_and_labels(image_files, label_files, channels=None, percentiles=None,
@@ -647,7 +651,7 @@ def load_images_from_paths(images_by_key):
     return images_dict
-#@log_function_call
+#@log_function_call
 def _rename_and_organize_image_files(src, regex, batch_size=100, pick_slice=False, skip_mode='01', metadata_type='', img_format='.tif'):
     """
     Convert z-stack images to maximum intensity projection (MIP) images.
@@ -664,13 +668,16 @@ def _rename_and_organize_image_files(src, regex, batch_size=100, pick_slice=Fals
         None
     """
+    if isinstance(img_format, str):
+        img_format = [img_format]
     from .utils import _extract_filename_metadata, print_progress
     regular_expression = re.compile(regex)
     stack_path = os.path.join(src, 'stack')
     files_processed = 0
     if not os.path.exists(stack_path) or (os.path.isdir(stack_path) and len(os.listdir(stack_path)) == 0):
-        all_filenames = [filename for filename in os.listdir(src) if filename.endswith(img_format)]
+        all_filenames = [filename for filename in os.listdir(src) if any(filename.endswith(ext) for ext in img_format)]
         print(f'All files: {len(all_filenames)} in {src}')
         time_ls = []
         image_paths_by_key = _extract_filename_metadata(all_filenames, src, regular_expression, metadata_type, pick_slice, skip_mode)
@@ -729,11 +736,11 @@ def _rename_and_organize_image_files(src, regex, batch_size=100, pick_slice=Fals
             images_by_key.clear()
         # Move original images to a new directory
-        valid_exts = [img_format]
         newpath = os.path.join(src, 'orig')
         os.makedirs(newpath, exist_ok=True)
         for filename in os.listdir(src):
-            if os.path.splitext(filename)[1] in valid_exts:
+            #print(f"{filename}: {os.path.splitext(filename)[1]}")
+            if os.path.splitext(filename)[1] in img_format:
                 move = os.path.join(newpath, filename)
                 if os.path.exists(move):
                     print(f'WARNING: A file with the same name already exists at location {move}')
@@ -891,11 +898,16 @@ def _merge_channels(src, plot=False):
     from .utils import print_progress
     stack_dir = os.path.join(src, 'stack')
-    allowed_names = ['01', '02', '03', '04', '00', '1', '2', '3', '4', '0']
+    #allowed_names = ['01', '02', '03', '04', '00', '1', '2', '3', '4', '0']
+    string_list = [str(i) for i in range(101)]+[f"{i:02d}" for i in range(10)]
+    allowed_names = sorted(string_list, key=lambda x: int(x))
     # List directories that match the allowed names
     chan_dirs = [d for d in os.listdir(src) if os.path.isdir(os.path.join(src, d)) and d in allowed_names]
     chan_dirs.sort()
+    num_matching_folders = len(chan_dirs)
     print(f'List of folders in src: {chan_dirs}. Single channel folders.')
@@ -925,7 +937,7 @@ def _merge_channels(src, plot=False):
     if plot:
         plot_arrays(os.path.join(src, 'stack'))
-    return
+    return num_matching_folders
 def _mip_all(src, include_first_chan=True):
@@ -1566,7 +1578,7 @@ def preprocess_img_data(settings):
     Returns:
         None
     """
     src = settings['src']
     valid_ext = ['tif', 'tiff', 'png', 'jpeg']
     files = os.listdir(src)
@@ -1584,10 +1596,6 @@ def preprocess_img_data(settings):
     else:
         print(f'Could not find any {valid_ext} files in {src} only found {extension_counts[0]}')
         if os.path.exists(os.path.join(src,'stack')):
             print('Found existing stack folder.')
         if os.path.exists(os.path.join(src,'channel_stack')):
@@ -1598,11 +1606,11 @@ def preprocess_img_data(settings):
     mask_channels = [settings['nucleus_channel'], settings['cell_channel'], settings['pathogen_channel']]
     backgrounds = [settings['nucleus_background'], settings['cell_background'], settings['pathogen_background']]
     settings, metadata_type, custom_regex, nr, plot, batch_size, timelapse, lower_percentile, randomize, all_to_mip, pick_slice, skip_mode, cmap, figuresize, normalize, save_dtype, test_mode, test_images, random_test = set_default_settings_preprocess_img_data(settings)
     regex = _get_regex(metadata_type, img_format, custom_regex)
     if test_mode:
         print(f'Running spacr in test mode')
@@ -1611,6 +1619,8 @@ def preprocess_img_data(settings):
             os.rmdir(os.path.join(src, 'test'))
             print(f"Deleted test directory: {os.path.join(src, 'test')}")
         except OSError as e:
+            print(f"Error deleting test directory: {e}")
+            print(f"Delete manually before running test mode")
             pass
         src = _run_test_mode(settings['src'], regex, timelapse, test_images, random_test)
@@ -1627,6 +1637,7 @@ def preprocess_img_data(settings):
                 if timelapse:
                     _move_to_chan_folder(src, regex, timelapse, metadata_type)
                 else:
+                    img_format = ['.tif', '.tiff', '.png', '.jpg', '.jpeg', '.bmp', '.nd2', '.czi', '.lif']
                     _rename_and_organize_image_files(src, regex, batch_size, pick_slice, skip_mode, metadata_type, img_format)
                     #Make sure no batches will be of only one image
@@ -1644,7 +1655,13 @@ def preprocess_img_data(settings):
                             print(f"all images: {all_imgs},  full batch: {full_batches}, last batch: {last_batch_size}")
                             raise ValueError("Last batch of size 1 detected. Adjust the batch size.")
-                _merge_channels(src, plot=False)
+                nr_channel_folders = _merge_channels(src, plot=False)
+                if len(settings['channels']) != nr_channel_folders:
+                    print(f"Number of channels does not match number of channel folders. channels: {settings['channels']} channel folders: {nr_channel_folders}")
+                    new_channels = list(range(nr_channel_folders))
+                    print(f"Changing channels from {settings['channels']} to {new_channels}")
+                    settings['channels'] = new_channels
                 if timelapse:
                     _create_movies_from_npy_per_channel(stack_path, fps=2)
@@ -1773,11 +1790,11 @@ def _read_and_join_tables(db_path, table_names=['cell', 'cytoplasm', 'nucleus',
             print(e)
     conn.close()
     if 'png_list' in dataframes:
-        png_list_df = dataframes['png_list'][['cell_id', 'png_path', 'plate', 'row_name', 'column_name', 'field']].copy()
+        png_list_df = dataframes['png_list'][['cell_id', 'png_path', 'plateID', 'rowID', 'columnID', 'fieldID']].copy()
         png_list_df['cell_id'] = png_list_df['cell_id'].str[1:].astype(int)
         png_list_df.rename(columns={'cell_id': 'object_label'}, inplace=True)
         if 'cell' in dataframes:
-            join_cols = ['object_label', 'plate', 'row_name', 'column_name','field']
+            join_cols = ['object_label', 'plateID', 'rowID', 'columnID','fieldID']
             dataframes['cell'] = pd.merge(dataframes['cell'], png_list_df, on=join_cols, how='left')
         else:
             print("Cell table not found in database tables.")
@@ -2078,14 +2095,18 @@ def _read_db(db_loc, tables):
     Returns:
     - dfs (list): A list of pandas DataFrames, each containing the data from a table.
     """
-    from .utils import rename_columns_in_db
+    from .utils import rename_columns_in_db, correct_metadata
     rename_columns_in_db(db_loc)
     conn = sqlite3.connect(db_loc)
     dfs = []
     for table in tables:
         query = f'SELECT * FROM {table}'
         df = pd.read_sql_query(query, conn)
+        df = correct_metadata(df)
         dfs.append(df)
     conn.close()
     return dfs
@@ -2264,7 +2285,7 @@ def _copy_missclassified(df):
 def _read_db(db_loc, tables):
-    from .utils import rename_columns_in_db
+    from .utils import rename_columns_in_db, correct_metadata
     rename_columns_in_db(db_loc)
     conn = sqlite3.connect(db_loc) # Create a connection to the database
@@ -2272,12 +2293,13 @@ def _read_db(db_loc, tables):
     for table in tables:
         query = f'SELECT * FROM {table}' # Write a SQL query to get the data from the database
         df = pd.read_sql_query(query, conn) # Use the read_sql_query function to get the data and save it as a DataFrame
+        df = correct_metadata(df)
         dfs.append(df)
     conn.close() # Close the connection
     return dfs
 def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_limit=10, change_plate=False):
-    from .io import _read_db
     from .utils import _split_data
     # Initialize an empty dictionary to store DataFrames by table name
@@ -2287,8 +2309,8 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_
     for idx, loc in enumerate(locs):
         db_dfs = _read_db(loc, tables)
         if change_plate:
-            db_dfs['plate'] = f'plate{idx+1}'
-            db_dfs['prc'] = db_dfs['plate'].astype(str) + '_' + db_dfs['row_name'].astype(str) + '_' + db_dfs['column_name'].astype(str)
+            db_dfs['plateID'] = f'plate{idx+1}'
+            db_dfs['prc'] = db_dfs['plateID'].astype(str) + '_' + db_dfs['rowID'].astype(str) + '_' + db_dfs['columnID'].astype(str)
         for table, df in zip(tables, db_dfs):
             data_dict[table].append(df)
@@ -2296,6 +2318,7 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_
     for table, dfs in data_dict.items():
         if dfs:
             data_dict[table] = pd.concat(dfs, axis=0)
         if verbose:
             print(f"{table}: {len(data_dict[table])}")
@@ -2382,18 +2405,18 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_
     if 'png_list' in data_dict:
         png_list = data_dict['png_list'].copy()
         png_list_g_df_numeric, png_list_g_df_non_numeric = _split_data(png_list, 'prcfo', 'cell_id')
-        png_list_g_df_non_numeric.drop(columns=['plate','row_name','column_name','field','file_name','cell_id', 'prcf'], inplace=True)
+        png_list_g_df_non_numeric.drop(columns=['plateID','rowID','columnID','fieldID','file_name','cell_id', 'prcf'], inplace=True)
         if verbose:
             print(f'png_list: {len(png_list)}, png_list grouped: {len(png_list_g_df_numeric)}')
             print(f"Added png_list columns: {png_list_g_df_numeric.columns}, {png_list_g_df_non_numeric.columns}")
         merged_df = merged_df.merge(png_list_g_df_numeric, left_index=True, right_index=True)
         merged_df = merged_df.merge(png_list_g_df_non_numeric, left_index=True, right_index=True)
     # Add prc (plate row column) and prcfo (plate row column field object) columns
-    metadata = metadata.assign(prc=lambda x: x['plate'] + '_' + x['row_name'] + '_' + x['column_name'])
+    metadata = metadata.assign(prc=lambda x: x['plateID'] + '_' + x['rowID'] + '_' + x['columnID'])
     cells_well = metadata.groupby('prc')['object_label'].nunique().reset_index(name='cells_per_well')
     metadata = metadata.merge(cells_well, on='prc')
-    metadata = metadata.assign(prcfo=lambda x: x['plate'] + '_' + x['row_name'] + '_' + x['column_name'] + '_' + x['field'] + '_' + x['object_label'])
+    metadata = metadata.assign(prcfo=lambda x: x['plateID'] + '_' + x['rowID'] + '_' + x['columnID'] + '_' + x['fieldID'] + '_' + x['object_label'])
     metadata.set_index('prcfo', inplace=True)
     # Merge metadata with final merged DataFrame
@@ -2981,7 +3004,7 @@ def training_dataset_from_annotation(db_path, dst, annotation_column='test', ann
     return class_paths
-def training_dataset_from_annotation_metadata(db_path, dst, annotation_column='test', annotated_classes=(1, 2), metadata_type_by='column_name', class_metadata=['c1','c2']):
+def training_dataset_from_annotation_metadata(db_path, dst, annotation_column='test', annotated_classes=(1, 2), metadata_type_by='columnID', class_metadata=['c1','c2']):
     all_paths = []
     # Connect to the database and retrieve the image paths and annotations
@@ -3003,9 +3026,9 @@ def training_dataset_from_annotation_metadata(db_path, dst, annotation_column='t
     # Filter all_paths by metadata_type_by and class_metadata
     filtered_paths = []
-    metadata_index = {'row_name': 2, 'column_name': 3}.get(metadata_type_by, None)
+    metadata_index = {'rowID': 2, 'columnID': 3}.get(metadata_type_by, None)
     if metadata_index is None:
-        raise ValueError(f"Invalid metadata_type_by value: {metadata_type_by}. Must be 'row_name' or 'column_name'. {class_metadata} must be a list formatted as ['c1', 'c2'] or ['r1', 'r2']")
+        raise ValueError(f"Invalid metadata_type_by value: {metadata_type_by}. Must be 'rowID' or 'columnID'. {class_metadata} must be a list formatted as ['c1', 'c2'] or ['r1', 'r2']")
     for row in all_paths:
         if row[metadata_index] in class_metadata:
@@ -3095,4 +3118,473 @@ def generate_dataset_from_lists(dst, class_data, classes, test_split=0.1):
         test_class_dir = os.path.join(dst, f'test/{cls}')
         print(f'Train class {cls}: {len(os.listdir(train_class_dir))}, Test class {cls}: {len(os.listdir(test_class_dir))}')
-    return os.path.join(dst, 'train'), os.path.join(dst, 'test')
+    return os.path.join(dst, 'train'), os.path.join(dst, 'test')
+def convert_separate_files_to_yokogawa(folder, regex):
+    ROWS = "ABCDEFGHIJKLMNOP"
+    COLS = [f"{i:02d}" for i in range(1, 25)]
+    WELLS = [f"{r}{c}" for r in ROWS for c in COLS]
+    def _get_next_well(used_wells):
+        plate = 1
+        for well in WELLS:
+            well_name = f"plate{plate}_{well}"
+            if well_name not in used_wells:
+                return well_name
+            if well == "P24":
+                plate += 1
+        return f"plate{plate}_A01"
+    pattern = re.compile(regex, re.I)
+    files_by_region = {}
+    rename_log = []
+    csv_path = os.path.join(folder, "rename_log.csv")
+    used_wells = set()
+    region_to_well = {}
+    # Group files by (plateID, wellID, fieldID, timeID, chanID)
+    for file in os.listdir(folder):
+        match = pattern.match(file)
+        if not match:
+            print(f"Skipping {file}: does not match regex.")
+            continue
+        meta = match.groupdict()
+        # Mandatory metadata
+        if 'wellID' not in meta or meta['wellID'] is None:
+            print(f"Skipping {file}: missing mandatory wellID.")
+            continue
+        wellID = meta['wellID']
+        # Optional metadata with defaults
+        plateID = meta.get('plateID', '1') or '1'
+        fieldID = meta.get('fieldID', '1') or '1'
+        timeID = int(meta.get('timeID', 1) or 1)
+        chanID = int(meta.get('chanID', 1) or 1)
+        sliceID = meta.get('sliceID')
+        sliceID = int(sliceID) if sliceID is not None else None
+        region_key = (plateID, wellID, fieldID, timeID, chanID)
+        files_by_region.setdefault(region_key, []).append((file, sliceID))
+    # Assign wells and process files per region
+    for region, file_list in files_by_region.items():
+        if region[:3] not in region_to_well:
+            next_well = _get_next_well(used_wells)
+            region_to_well[region[:3]] = next_well
+            used_wells.add(next_well)
+        assigned_well = region_to_well[region[:3]]
+        plateID, wellID, fieldID, timeID, chanID = region
+        # Check if multiple slices exist and are meaningful
+        slice_ids = [sid for _, sid in file_list if sid is not None]
+        unique_slices = set(slice_ids)
+        images = []
+        for filename, _ in sorted(file_list, key=lambda x: x[1] or 1):
+            img = tifffile.imread(os.path.join(folder, filename))
+            images.append(img)
+        # Perform MIP only if multiple unique slices are present
+        if len(unique_slices) > 1:
+            img_to_save = np.max(np.stack(images), axis=0)
+        else:
+            img_to_save = images[0]
+        dtype = img_to_save.dtype
+        new_filename = f"{assigned_well}_T{timeID:04d}F{int(fieldID):03d}L01C{chanID:02d}.tif"
+        new_filepath = os.path.join(folder, new_filename)
+        tifffile.imwrite(new_filepath, img_to_save.astype(dtype))
+        # Log original filenames involved in MIP or single file rename
+        original_files = ";".join(f[0] for f in file_list)
+        rename_log.append({"Original File(s)": original_files, "Renamed TIFF": new_filename})
+    pd.DataFrame(rename_log).to_csv(csv_path, index=False)
+    print(f"Processing complete. Files saved in {folder} and rename log saved as {csv_path}.")
+def convert_to_yokogawa(folder):
+    """
+    Detects file type in the folder and converts them
+    to Yokogawa-style naming with Maximum Intensity Projection (MIP).
+    """
+    #def _get_next_well(used_wells):
+    #    """
+    #    Determines the next available well position in a 384-well format.
+    #    Iterates wells, and after P24, switches to plate2.
+    #    """
+    #    plate = 1
+    #    for well in WELLS:
+    #        well_name = f"plate{plate}_{well}"
+    #        if well_name not in used_wells:
+    #            used_wells.add(well_name)
+    #            return well_name
+    #        if well == "P24":
+    #            plate += 1
+    #    raise ValueError("All wells exhausted.")
+    def _get_next_well(used_wells):
+        """
+        Determines the next available well position across multiple 384-well plates.
+        """
+        ROWS = "ABCDEFGHIJKLMNOP"
+        COLS = [f"{i:02d}" for i in range(1, 25)]
+        WELLS = [f"{r}{c}" for r in ROWS for c in COLS]
+        plate = 1
+        while True:
+            for well in WELLS:
+                well_name = f"plate{plate}_{well}"
+                if well_name not in used_wells:
+                    used_wells.add(well_name)
+                    return well_name
+            plate += 1  # All wells exhausted in current plate, increment to next plate
+    # Define 384-well plate format
+    ROWS = "ABCDEFGHIJKLMNOP"
+    COLS = [f"{i:02d}" for i in range(1, 25)]
+    WELLS = [f"{r}{c}" for r in ROWS for c in COLS]
+    filenames = []
+    rename_log = []
+    csv_path = os.path.join(folder, "rename_log.csv")
+    used_wells = set()
+    # **Dictionary to store well assignments per original file**
+    file_to_well = {}
+    for file in os.listdir(folder):
+        path = os.path.join(folder, file)
+        ext = file.lower().split('.')[-1]
+        # **Assign a well only once per original file**
+        if file not in file_to_well:
+            file_to_well[file] = _get_next_well(used_wells)
+            #used_wells.add(file_to_well[file])  # Mark it as used
+        well = file_to_well[file]  # Use the same well for all channels/times
+        ### **Process Nikon ND2 Files**
+        if ext == 'nd2':
+            try:
+                nd2 = ND2Reader(path)
+                metadata = nd2.metadata
+                timepoints = list(range(len(metadata.get("frames", [0])))) or [0]
+                fields = list(range(len(metadata.get("fields_of_view", [0])))) or [0]
+                z_levels = list(metadata.get("z_levels", range(1))) if metadata.get("z_levels") else [0]
+                channels = metadata.get("channels", [])
+                for t_idx in timepoints:
+                    for f_idx in fields:
+                        for c_idx, channel in enumerate(channels):
+                            try:
+                                mip_image = np.max.reduce([
+                                    nd2.get_frame_2D(t=t_idx, v=f_idx, z=z_idx, c=c_idx)
+                                    for z_idx in z_levels
+                                ], axis=0)
+                                dtype = mip_image.dtype
+                                filename = f"{well}_T{t_idx+1:04d}F{f_idx+1:03d}L01C{c_idx+1:02d}.tif"
+                                filepath = os.path.join(folder, filename)
+                                tifffile.imwrite(filepath, mip_image.astype(dtype))
+                                rename_log.append({"Original File": file, "Renamed TIFF": filename})
+                            except IndexError:
+                                print(f"Warning: ND2 file {file} has an incomplete data structure. Skipping.")
+            except Exception as e:
+                print(f"Error processing ND2 file {file}: {e}")
+        ### **Process Zeiss CZI Files**
+        elif ext == 'czi':
+            with czifile.CziFile(path) as czi:
+                img_data = czi.asarray()  # Read the full image array
+                # Remove singleton dimensions (if any)
+                img_data = np.squeeze(img_data)
+                # Get the actual shape of the data
+                shape = img_data.shape
+                num_dims = len(shape)
+                # Default values if dimensions are missing
+                timepoints = 1
+                z_levels = 1
+                channels = 1
+                # Determine dimension mapping dynamically
+                if num_dims == 2:  # (Y, X) → Single 2D image
+                    y_dim, x_dim = shape
+                    img_data = img_data.reshape(1, 1, 1, y_dim, x_dim)  # Add missing dimensions
+                elif num_dims == 3:  # (C, Y, X) or (Z, Y, X)
+                    if shape[0] <= 4:  # Likely (C, Y, X)
+                        channels, y_dim, x_dim = shape
+                        img_data = img_data.reshape(1, 1, channels, y_dim, x_dim)  # Add missing dimensions
+                    else:  # Likely (Z, Y, X)
+                        z_levels, y_dim, x_dim = shape
+                        img_data = img_data.reshape(1, z_levels, 1, y_dim, x_dim)  # Add missing dimensions
+                elif num_dims == 4:  # Could be (T, C, Y, X) or (T, Z, Y, X) or (Z, C, Y, X)
+                    if shape[1] <= 4:  # Assume (T, C, Y, X)
+                        timepoints, channels, y_dim, x_dim = shape
+                        img_data = img_data.reshape(timepoints, 1, channels, y_dim, x_dim)  # Add missing Z
+                    else:  # Assume (T, Z, Y, X) or (Z, C, Y, X)
+                        timepoints, z_levels, y_dim, x_dim = shape
+                        img_data = img_data.reshape(timepoints, z_levels, 1, y_dim, x_dim)  # Add missing C
+                elif num_dims == 5:  # Standard (T, Z, C, Y, X)
+                    timepoints, z_levels, channels, y_dim, x_dim = shape
+                else:
+                    raise ValueError(f"Unexpected CZI shape: {shape}. Unable to process.")
+                # Iterate over detected timepoints, channels, and perform MIP over Z
+                for t_idx in range(timepoints):
+                    for c_idx in range(channels):
+                        # Extract Z-stack or single image
+                        if z_levels > 1:
+                            z_stack = img_data[t_idx, :, c_idx, :, :]  # MIP over Z
+                            mip_image = np.max(z_stack, axis=0)
+                        else:
+                            mip_image = img_data[t_idx, 0, c_idx, :, :]  # No Z, take directly
+                        # Ensure correct dtype
+                        dtype = mip_image.dtype
+                        # Generate Yokogawa-style filename
+                        filename = f"{well}_T{t_idx+1:04d}F001L01C{c_idx+1:02d}.tif"
+                        filepath = os.path.join(folder, filename)
+                        # Save the extracted image
+                        tifffile.imwrite(filepath, mip_image.astype(dtype))
+                        rename_log.append({"Original File": file, "Renamed TIFF": filename})
+        ### **Process Leica LIF Files**
+        elif ext == 'lif':
+            try:
+                lif_file = readlif.Reader(path)
+                for image_idx, image in enumerate(lif_file.getIterImage()):
+                    timepoints = range(getattr(image.dims, 't', 1))
+                    z_levels = range(getattr(image.dims, 'z', 1))
+                    channels = range(getattr(image.dims, 'c', 1))
+                    for t_idx in timepoints:
+                        for c_idx in channels:
+                            z_stack = []
+                            for z_idx in z_levels:
+                                try:
+                                    frame = image.getFrame(z=z_idx, t=t_idx, c=c_idx)
+                                    z_stack.append(frame)
+                                except IndexError:
+                                    print(f"Missing frame: T{t_idx}, Z{z_idx}, C{c_idx} in {file}, skipping frame.")
+                            if z_stack:
+                                mip_image = np.max(np.stack(z_stack), axis=0)
+                                dtype = mip_image.dtype
+                                filename = f"{well}_T{t_idx+1:04d}F{image_idx+1:03d}L01C{c_idx+1:02d}.tif"
+                                filepath = os.path.join(folder, filename)
+                                tifffile.imwrite(filepath, mip_image.astype(dtype))
+                                rename_log.append({"Original File": file, "Renamed TIFF": filename})
+            except Exception as e:
+                print(f"Error processing LIF file {file}: {e}")
+        ### **Process Standard Image Files (TIFF, PNG, JPEG, BMP)**
+        elif ext in ['tif', 'tiff', 'png', 'jpg', 'jpeg', 'bmp'] and not file.startswith("plate"):
+            try:
+                with tifffile.TiffFile(path) as tif:
+                    images = tif.asarray()
+                    ndim = images.ndim
+                    # Defaults
+                    t_dim = z_dim = c_dim = 1
+                    # Determine dimensions more explicitly
+                    if ndim == 2:
+                        mip_image = images
+                        filename = f"{well}_T0001F001L01C01.tif"
+                        tifffile.imwrite(os.path.join(folder, filename), mip_image)
+                        rename_log.append({"Original File": file, "Renamed TIFF": filename})
+                        continue
+                    elif ndim == 3:
+                        if images.shape[0] <= 4:  # Likely channels
+                            c_dim = images.shape[0]
+                            for c in range(c_dim):
+                                mip_image = images[c, :, :]
+                                filename = f"{well}_T0001F001L01C{c+1:02d}.tif"
+                                tifffile.imwrite(os.path.join(folder, filename), mip_image)
+                                rename_log.append({"Original File": file, "Renamed TIFF": filename})
+                        else:  # Z-stack
+                            mip_image = np.max(images, axis=0)
+                            filename = f"{well}_T0001F001L01C01.tif"
+                            tifffile.imwrite(os.path.join(folder, filename), mip_image)
+                            rename_log.append({"Original File": file, "Renamed TIFF": filename})
+                    elif ndim == 4:
+                        t_dim, z_dim, y_dim, x_dim = images.shape
+                        for t in range(t_dim):
+                            mip_image = np.max(images[t, :, :, :], axis=0)
+                            filename = f"{well}_T{t+1:04d}F001L01C01.tif"
+                            tifffile.imwrite(os.path.join(folder, filename), mip_image)
+                            rename_log.append({"Original File": file, "Renamed TIFF": filename})
+                    else:
+                        raise ValueError(f"Unsupported TIFF dimensions: {images.shape}")
+            except Exception as e:
+                print(f"Error processing standard image file {file}: {e}")
+    # Save rename log as CSV
+    pd.DataFrame(rename_log).to_csv(csv_path, index=False)
+    print(f"Processing complete. Files saved in {folder} and rename log saved as {csv_path}.")
+def apply_augmentation(image, method):
+    if method == 'rotate90':
+        return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
+    elif method == 'rotate180':
+        return cv2.rotate(image, cv2.ROTATE_180)
+    elif method == 'rotate270':
+        return cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
+    elif method == 'flip_h':
+        return cv2.flip(image, 1)
+    elif method == 'flip_v':
+        return cv2.flip(image, 0)
+    return image
+def process_instruction(entry):
+    img = tifffile.imread(entry["src_img"])
+    msk = tifffile.imread(entry["src_msk"])
+    if entry["augment"]:
+        img = apply_augmentation(img, entry["augment"])
+        msk = apply_augmentation(msk, entry["augment"])
+    tifffile.imwrite(entry["dst_img"], img)
+    tifffile.imwrite(entry["dst_msk"], msk)
+    return 1
+def prepare_cellpose_dataset(input_root, augment_data=False, train_fraction=0.8, n_jobs=None):
+    from .utils import print_progress
+    time_ls = []
+    input_root = os.path.abspath(input_root)
+    output_root = os.path.join(input_root, "cellpose_dataset")
+    def get_augmentations():
+        return ['rotate90', 'rotate180', 'rotate270', 'flip_h', 'flip_v']
+    def find_image_mask_pairs(dataset_path):
+        mask_dir = os.path.join(dataset_path, "masks")
+        pairs = []
+        for fname in os.listdir(dataset_path):
+            if fname.lower().endswith((".tif", ".tiff")):
+                img_path = os.path.join(dataset_path, fname)
+                msk_path = os.path.join(mask_dir, fname)
+                if os.path.isfile(msk_path):
+                    pairs.append((img_path, msk_path))
+        return pairs
+    def prepare_output_folders(base):
+        for subset in ["train", "test"]:
+            os.makedirs(os.path.join(base, subset, "images"), exist_ok=True)
+            os.makedirs(os.path.join(base, subset, "masks"), exist_ok=True)
+    print("Scanning datasets...")
+    datasets = []
+    for subdir in os.listdir(input_root):
+        dataset_path = os.path.join(input_root, subdir)
+        if os.path.isdir(dataset_path) and os.path.isdir(os.path.join(dataset_path, "masks")):
+            pairs = find_image_mask_pairs(dataset_path)
+            if pairs:
+                datasets.append(pairs)
+                print(f"  Found {len(pairs)} images in {dataset_path}")
+    if not datasets:
+        raise ValueError("No valid datasets with images and masks found.")
+    prepare_output_folders(output_root)
+    min_size = min(len(pairs) for pairs in datasets)
+    target_size = min_size if not augment_data else max(len(pairs) for pairs in datasets)
+    print("\nPreparing instruction list...")
+    instructions = []
+    global_index = 0
+    for pairs in datasets:
+        dataset_len = len(pairs)
+        # --- Step 1: Sample or augment ---
+        sampled_pairs = []
+        if dataset_len >= target_size:
+            sampled_pairs = random.sample(pairs, target_size)
+        else:
+            sampled_pairs = pairs.copy()
+            if augment_data:
+                needed = target_size - dataset_len
+                aug_methods = get_augmentations()
+                full_loops = needed // len(aug_methods)
+                extra = needed % len(aug_methods)
+                for _ in range(full_loops):
+                    for (img_path, msk_path), aug in zip(pairs, aug_methods * (dataset_len // len(aug_methods))):
+                        sampled_pairs.append((img_path, msk_path, aug))
+                if extra > 0:
+                    subset = random.sample(pairs * ((extra // len(aug_methods)) + 1), extra)
+                    for (img_path, msk_path), aug in zip(subset, aug_methods[:extra]):
+                        sampled_pairs.append((img_path, msk_path, aug))
+        # Add "no augmentation" tag to original files
+        augmented_sampled = [
+            (tup[0], tup[1], None) if len(tup) == 2 else tup
+            for tup in sampled_pairs
+        ]
+        # --- Step 2: Split into train/test ---
+        random.shuffle(augmented_sampled)
+        split_idx = int(train_fraction * len(augmented_sampled))
+        split_sets = {
+            "train": augmented_sampled[:split_idx],
+            "test": augmented_sampled[split_idx:]
+        }
+        for subset, items in split_sets.items():
+            for img_path, msk_path, aug in items:
+                dst_img = os.path.join(output_root, subset, "images", f"{global_index:05d}.tif")
+                dst_msk = os.path.join(output_root, subset, "masks", f"{global_index:05d}.tif")
+                instructions.append({
+                    "src_img": img_path,
+                    "src_msk": msk_path,
+                    "dst_img": dst_img,
+                    "dst_msk": dst_msk,
+                    "augment": aug
+                })
+                global_index += 1
+    print(f"Total files to process: {len(instructions)}")
+    # --- Step 3: Process with multiprocessing ---
+    print("Processing images with multiprocessing...")
+    if n_jobs is None:
+        n_jobs = max(1, cpu_count() - 1)
+    else:
+        n_jobs = int(n_jobs)
+    with Pool(n_jobs) as pool:
+        for i, _ in enumerate(pool.imap_unordered(process_instruction, instructions), 1):
+            print_progress(i, len(instructions), n_jobs=n_jobs, time_ls=time_ls, batch_size=None, operation_type="cellpose dataset")
+    print(f"Done. Dataset saved to: {output_root}")

spacr 0.4.12__py3-none-any.whl → 0.4.60__py3-none-any.whl

spacr 0.4.12py3-none-any.whl → 0.4.60py3-none-any.whl