PyPI - spacr - Versions diffs - 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl - Mend

spacr 0.3.2py3-none-any.whl → 0.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

spacr/core.py +105 -1
spacr/deep_spacr.py +191 -141
spacr/gui.py +1 -0
spacr/gui_core.py +13 -4
spacr/gui_utils.py +29 -1
spacr/io.py +84 -125
spacr/measure.py +1 -38
spacr/ml.py +153 -66
spacr/plot.py +429 -7
spacr/settings.py +55 -10
spacr/submodules.py +7 -6
spacr/toxo.py +9 -4
spacr/utils.py +510 -16
{spacr-0.3.2.dist-info → spacr-0.3.3.dist-info}/METADATA +28 -25
{spacr-0.3.2.dist-info → spacr-0.3.3.dist-info}/RECORD +19 -19
{spacr-0.3.2.dist-info → spacr-0.3.3.dist-info}/LICENSE +0 -0
{spacr-0.3.2.dist-info → spacr-0.3.3.dist-info}/WHEEL +0 -0
{spacr-0.3.2.dist-info → spacr-0.3.3.dist-info}/entry_points.txt +0 -0
{spacr-0.3.2.dist-info → spacr-0.3.3.dist-info}/top_level.txt +0 -0

spacr/io.py CHANGED Viewed

@@ -22,6 +22,7 @@ from torchvision.transforms import ToTensor
 import seaborn as sns
 from nd2reader import ND2Reader
 from torchvision import transforms
+from sklearn.model_selection import train_test_split
 def process_non_tif_non_2D_images(folder):
     """Processes all images in the folder and splits them into grayscale channels, preserving bit depth."""
@@ -984,47 +985,6 @@ def _move_to_chan_folder(src, regex, timelapse=False, metadata_type=''):
                     shutil.move(os.path.join(src, filename), move)
     return
-def _merge_channels_v2(src, plot=False):
-    from .plot import plot_arrays
-    """
-    Merge the channels in the given source directory and save the merged files in a 'stack' directory.
-    Args:
-        src (str): The path to the source directory containing the channel folders.
-        plot (bool, optional): Whether to plot the merged arrays. Defaults to False.
-    Returns:
-        None
-    """
-    src = Path(src)
-    stack_dir = src / 'stack'
-    chan_dirs = [d for d in src.iterdir() if d.is_dir() and d.name in ['01', '02', '03', '04', '00', '1', '2', '3', '4','0']]
-    chan_dirs.sort(key=lambda x: x.name)
-    print(f'List of folders in src: {[d.name for d in chan_dirs]}. Single channel folders.')
-    start_time = time.time()
-    # First directory and its files
-    dir_files = list(chan_dirs[0].iterdir())
-    # Create the 'stack' directory if it doesn't exist
-    stack_dir.mkdir(exist_ok=True)
-    print(f'generated folder with merged arrays: {stack_dir}')
-    if _is_dir_empty(stack_dir):
-        with Pool(max(cpu_count() // 2, 1)) as pool:
-        #with Pool(cpu_count()) as pool:
-            merge_func = partial(_merge_file, chan_dirs, stack_dir)
-            pool.map(merge_func, dir_files)
-    avg_time = (time.time() - start_time) / len(dir_files)
-    print(f'Average Time: {avg_time:.3f} sec')
-    if plot:
-        plot_arrays(src+'/stack')
-    return
 def _merge_channels(src, plot=False):
     """
     Merge the channels in the given source directory and save the merged files in a 'stack' directory without using multiprocessing.
@@ -2384,12 +2344,8 @@ def _results_to_csv(src, df, df_well):
     wells.to_csv(wells_loc, index=True, header=True)
     cells.to_csv(cells_loc, index=True, header=True)
     return cells, wells
-###################################################
-#  Classify
-###################################################
-def read_plot_model_stats(file_path ,save=False):
+def read_plot_model_stats(train_file_path, val_file_path ,save=False):
     def _plot_and_save(train_df, val_df, column='accuracy', save=False, path=None, dpi=600):
@@ -2418,37 +2374,19 @@ def read_plot_model_stats(file_path ,save=False):
             plt.savefig(pdf_path, format='pdf', dpi=dpi)
         else:
             plt.show()
-    # Read the CSV into a dataframe
-    df = pd.read_csv(file_path, index_col=0)
-    # Split the dataframe into train and validation based on the index
-    train_df = df.filter(like='_train', axis=0).copy()
-    val_df = df.filter(like='_val', axis=0).copy()
-    fldr_1 = os.path.dirname(file_path)
-    train_csv_path = os.path.join(fldr_1, 'train.csv')
-    val_csv_path = os.path.join(fldr_1, 'validation.csv')
-    fldr_2 = os.path.dirname(fldr_1)
-    fldr_3 = os.path.dirname(fldr_2)
-    bn_1 = os.path.basename(fldr_1)
-    bn_2 = os.path.basename(fldr_2)
-    bn_3 = os.path.basename(fldr_3)
-    model_name = str(f'{bn_1}_{bn_2}_{bn_3}')
+    # Read the CSVs into DataFrames
+    train_df = pd.read_csv(train_file_path, index_col=0)
+    val_df = pd.read_csv(val_file_path, index_col=0)
-    # Extract epochs from index
-    train_df['epoch'] = [int(idx.split('_')[0]) for idx in train_df.index]
-    val_df['epoch'] = [int(idx.split('_')[0]) for idx in val_df.index]
-    # Save dataframes to a CSV file
-    train_df.to_csv(train_csv_path)
-    val_df.to_csv(val_csv_path)
+    # Get the folder path for saving plots
+    fldr_1 = os.path.dirname(train_file_path)
     if save:
         # Setting the style
         sns.set(style="whitegrid")
+    # Plot and save the results
     _plot_and_save(train_df, val_df, column='accuracy', save=save, path=fldr_1)
     _plot_and_save(train_df, val_df, column='neg_accuracy', save=save, path=fldr_1)
     _plot_and_save(train_df, val_df, column='pos_accuracy', save=save, path=fldr_1)
@@ -2496,50 +2434,53 @@ def _save_model(model, model_type, results_df, dst, epoch, epochs, intermedeate_
     return model_path
-def _save_progress(dst, results_df, result_type='train'):
+def _save_progress(dst, train_df, validation_df):
     """
     Save the progress of the classification model.
     Parameters:
     dst (str): The destination directory to save the progress.
-    results_df (pandas.DataFrame): The DataFrame containing accuracy, loss, and PRAUC.
-    train_metrics_df (pandas.DataFrame): The DataFrame containing training metrics.
+    train_df (pandas.DataFrame): The DataFrame containing training stats.
+    validation_df (pandas.DataFrame): The DataFrame containing validation stats (if available).
     Returns:
     None
     """
+    def _save_df_to_csv(file_path, df):
+        """
+        Save the given DataFrame to the specified CSV file, either creating a new file or appending to an existing one.
+        Parameters:
+        file_path (str): The file path where the CSV will be saved.
+        df (pandas.DataFrame): The DataFrame to save.
+        """
+        if not os.path.exists(file_path):
+            with open(file_path, 'w') as f:
+                df.to_csv(f, index=True, header=True)
+                f.flush()  # Ensure data is written to the file system
+        else:
+            with open(file_path, 'a') as f:
+                df.to_csv(f, index=True, header=False)
+                f.flush()
     # Save accuracy, loss, PRAUC
     os.makedirs(dst, exist_ok=True)
-    results_path = os.path.join(dst, f'{result_type}.csv')
-    if not os.path.exists(results_path):
-        results_df.to_csv(results_path, index=True, header=True, mode='w')
-    else:
-        results_df.to_csv(results_path, index=True, header=False, mode='a')
+    results_path_train = os.path.join(dst, 'train.csv')
+    results_path_validation = os.path.join(dst, 'validation.csv')
-    if result_type == 'train':
-        read_plot_model_stats(results_path, save=True)
-    return
+    # Save training data
+    _save_df_to_csv(results_path_train, train_df)
-def _save_settings(settings, src):
-    """
-    Save the settings dictionary to a CSV file.
+    # Save validation data if available
+    if validation_df is not None:
+        _save_df_to_csv(results_path_validation, validation_df)
-    Parameters:
-    - settings (dict): A dictionary containing the settings.
-    - src (str): The source directory where the settings file will be saved.
+        # Call read_plot_model_stats after ensuring the files are saved
+        read_plot_model_stats(results_path_train, results_path_validation, save=True)
-    Returns:
-    None
-    """
-    dst = os.path.join(src,'model')
-    settings_loc =  os.path.join(dst,'settings.csv')
-    os.makedirs(dst, exist_ok=True)
-    settings_df = pd.DataFrame(list(settings.items()), columns=['setting_key', 'setting_value'])
-    display(settings_df)
-    settings_df.to_csv(settings_loc, index=False)
     return
 def _copy_missclassified(df):
     misclassified = df[df['true_label'] != df['predicted_label']]
     for _, row in misclassified.iterrows():
@@ -2869,7 +2810,8 @@ def generate_dataset(settings={}):
         all_paths = []
         for i, src in enumerate(settings['src']):
             db_path = os.path.join(src, 'measurements', 'measurements.db')
-            dst = os.path.join(src, 'datasets')
+            if i == 0:
+                dst = os.path.join(src, 'datasets')
             paths = generate_path_list_from_db(db_path, file_metadata=settings['file_metadata'])
             correct_paths(paths, src)
             all_paths.extend(paths)
@@ -2917,10 +2859,12 @@ def generate_dataset(settings={}):
     # Combine the temporary tar files into a final tar
     date_name = datetime.date.today().strftime('%y%m%d')
-    if not settings['file_metadata'] is None:
-        tar_name = f"{date_name}_{settings['experiment']}_{settings['file_metadata']}.tar"
-    else:
-        tar_name = f"{date_name}_{settings['experiment']}.tar"
+    if len(settings['src']) > 1:
+        date_name = f"{date_name}_combined"
+    #if not settings['file_metadata'] is None:
+    #    tar_name = f"{date_name}_{settings['experiment']}_{settings['file_metadata']}.tar"
+    #else:
+    tar_name = f"{date_name}_{settings['experiment']}.tar"
     tar_name = os.path.join(dst, tar_name)
     if os.path.exists(tar_name):
         number = random.randint(1, 100)
@@ -2967,7 +2911,6 @@ def generate_loaders(src, mode='train', image_size=224, batch_size=32, classes=[
     - val_loaders (list): List of data loaders for validation datasets.
     """
-    from .io import spacrDataset
     from .utils import SelectChannels, augment_dataset
     chans = []
@@ -3066,10 +3009,6 @@ def generate_loaders(src, mode='train', image_size=224, batch_size=32, classes=[
 def generate_training_dataset(settings):
-    from .io import _read_and_merge_data, _read_db
-    from .utils import get_paths_from_db, annotate_conditions, save_settings
-    from .settings import set_generate_training_dataset_defaults
     # Function to filter png_list_df by prcfo present in df without merging
     def filter_png_list(db_path, settings):
         tables = ['cell', 'nucleus', 'pathogen', 'cytoplasm']
@@ -3173,34 +3112,55 @@ def generate_training_dataset(settings):
                 class_paths_ls[i] = random.sample(class_paths, size)
         return class_paths_ls
+    from .io import _read_and_merge_data, _read_db
+    from .utils import get_paths_from_db, annotate_conditions, save_settings
+    from .settings import set_generate_training_dataset_defaults
     # Set default settings and save
     settings = set_generate_training_dataset_defaults(settings)
     save_settings(settings, 'cv_dataset', show=True)
-    db_path = os.path.join(settings['src'], 'measurements', 'measurements.db')
-    dst = os.path.join(settings['src'], 'datasets', 'training')
+    class_path_list = None
-    # Create a new directory for training data if necessary
-    if os.path.exists(dst):
-        for i in range(1, 100000):
-            dst = os.path.join(settings['src'], 'datasets', f'training_{i}')
-            if not os.path.exists(dst):
-                print(f'Creating new directory for training: {dst}')
-                break
+    if isinstance(settings['src'], str):
+        src = [settings['src']]
-    # Select dataset based on dataset mode
-    if settings['dataset_mode'] == 'annotation':
-        class_paths_ls = annotation_based_selection(db_path, dst, settings)
+    for i, src in enumerate(settings['src']):
+        db_path = os.path.join(src, 'measurements', 'measurements.db')
+        if len(settings['src']) > 1 and i == 0:
+            dst = os.path.join(src, 'datasets', 'training_all')
+        elif len(settings['src']) == 1:
+            dst = os.path.join(src, 'datasets', 'training')
+        # Create a new directory for training data if necessary
+        if os.path.exists(dst):
+            for i in range(1, 100000):
+                dst = dst + f'_{i}'
+                if not os.path.exists(dst):
+                    print(f'Creating new directory for training: {dst}')
+                    break
-    elif settings['dataset_mode'] == 'metadata':
-        class_paths_ls = metadata_based_selection(db_path, settings)
+        # Select dataset based on dataset mode
+        if settings['dataset_mode'] == 'annotation':
+            class_paths_ls = annotation_based_selection(db_path, dst, settings)
+        elif settings['dataset_mode'] == 'metadata':
+            class_paths_ls = metadata_based_selection(db_path, settings)
+        elif settings['dataset_mode'] == 'measurement':
+            class_paths_ls = measurement_based_selection(settings, db_path)
+        if class_path_list is None:
+            class_path_list = [[] for _ in range(len(class_paths_ls))]
-    elif settings['dataset_mode'] == 'measurement':
-        class_paths_ls = measurement_based_selection(settings, db_path)
+        # Extend each list in class_path_list with the corresponding list from class_paths_ls
+        for idx in range(len(class_paths_ls)):
+            class_path_list[idx].extend(class_paths_ls[idx])
     # Generate and return training and testing directories
-    train_class_dir, test_class_dir = generate_dataset_from_lists(dst, class_data=class_paths_ls, classes=settings['classes'], test_split=settings['test_split'])
+    train_class_dir, test_class_dir = generate_dataset_from_lists(dst, class_data=class_path_list, classes=settings['classes'], test_split=settings['test_split'])
     return train_class_dir, test_class_dir
@@ -3234,7 +3194,6 @@ def training_dataset_from_annotation(db_path, dst, annotation_column='test', ann
 def generate_dataset_from_lists(dst, class_data, classes, test_split=0.1):
     from .utils import print_progress
-    from .deep_spacr import train_test_split
     # Make sure that the length of class_data matches the length of classes
     if len(class_data) != len(classes):
         raise ValueError("class_data and classes must have the same length.")

spacr/measure.py CHANGED Viewed

@@ -652,43 +652,6 @@ def img_list_to_grid(grid, titles=None):
     plt.tight_layout(pad=0.1)
     return fig
-def filepaths_to_database(img_paths, settings, source_folder, crop_mode):
-    from. utils import _map_wells_png
-    png_df = pd.DataFrame(img_paths, columns=['png_path'])
-    png_df['file_name'] = png_df['png_path'].apply(lambda x: os.path.basename(x))
-    parts = png_df['file_name'].apply(lambda x: pd.Series(_map_wells_png(x, timelapse=settings['timelapse'])))
-    columns = ['plate', 'row', 'col', 'field']
-    if settings['timelapse']:
-        columns = columns + ['time_id']
-    columns = columns + ['prcfo']
-    if crop_mode == 'cell':
-        columns = columns + ['cell_id']
-    if crop_mode == 'nucleus':
-        columns = columns + ['nucleus_id']
-    if crop_mode == 'pathogen':
-        columns = columns + ['pathogen_id']
-    if crop_mode == 'cytoplasm':
-        columns = columns + ['cytoplasm_id']
-    png_df[columns] = parts
-    try:
-        conn = sqlite3.connect(f'{source_folder}/measurements/measurements.db', timeout=5)
-        png_df.to_sql('png_list', conn, if_exists='append', index=False)
-        conn.commit()
-    except sqlite3.OperationalError as e:
-        print(f"SQLite error: {e}", flush=True)
-        traceback.print_exc()
 #@log_function_call
 def _measure_crop_core(index, time_ls, file, settings):
@@ -711,7 +674,7 @@ def _measure_crop_core(index, time_ls, file, settings):
     """
     from .plot import _plot_cropped_arrays
-    from .utils import _merge_overlapping_objects, _filter_object, _relabel_parent_with_child_labels, _exclude_objects, normalize_to_dtype
+    from .utils import _merge_overlapping_objects, _filter_object, _relabel_parent_with_child_labels, _exclude_objects, normalize_to_dtype, filepaths_to_database
     from .utils import _merge_and_save_to_database, _crop_center, _find_bounding_box, _generate_names, _get_percentiles
     figs = {}

spacr 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

spacr 0.3.2py3-none-any.whl → 0.3.3py3-none-any.whl