spacr 0.0.81__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/utils.py CHANGED
@@ -90,130 +90,6 @@ def check_mask_folder(src,mask_fldr):
90
90
  else:
91
91
  return True
92
92
 
93
- def set_default_plot_merge_settings():
94
- settings = {}
95
- settings.setdefault('include_noninfected', True)
96
- settings.setdefault('include_multiinfected', True)
97
- settings.setdefault('include_multinucleated', True)
98
- settings.setdefault('remove_background', False)
99
- settings.setdefault('filter_min_max', None)
100
- settings.setdefault('channel_dims', [0,1,2,3])
101
- settings.setdefault('backgrounds', [100,100,100,100])
102
- settings.setdefault('cell_mask_dim', 4)
103
- settings.setdefault('nucleus_mask_dim', 5)
104
- settings.setdefault('pathogen_mask_dim', 6)
105
- settings.setdefault('outline_thickness', 3)
106
- settings.setdefault('outline_color', 'gbr')
107
- settings.setdefault('overlay_chans', [1,2,3])
108
- settings.setdefault('overlay', True)
109
- settings.setdefault('normalization_percentiles', [2,98])
110
- settings.setdefault('normalize', True)
111
- settings.setdefault('print_object_number', True)
112
- settings.setdefault('nr', 1)
113
- settings.setdefault('figuresize', 50)
114
- settings.setdefault('cmap', 'inferno')
115
- settings.setdefault('verbose', True)
116
-
117
- return settings
118
-
119
- def set_default_settings_preprocess_generate_masks(src, settings={}):
120
- # Main settings
121
- settings['src'] = src
122
- settings.setdefault('preprocess', True)
123
- settings.setdefault('masks', True)
124
- settings.setdefault('save', True)
125
- settings.setdefault('batch_size', 50)
126
- settings.setdefault('test_mode', False)
127
- settings.setdefault('test_images', 10)
128
- settings.setdefault('magnification', 20)
129
- settings.setdefault('custom_regex', None)
130
- settings.setdefault('metadata_type', 'cellvoyager')
131
- settings.setdefault('workers', os.cpu_count()-4)
132
- settings.setdefault('randomize', True)
133
- settings.setdefault('verbose', True)
134
-
135
- settings.setdefault('remove_background_cell', False)
136
- settings.setdefault('remove_background_nucleus', False)
137
- settings.setdefault('remove_background_pathogen', False)
138
-
139
- # Channel settings
140
- settings.setdefault('cell_channel', None)
141
- settings.setdefault('nucleus_channel', None)
142
- settings.setdefault('pathogen_channel', None)
143
- settings.setdefault('channels', [0,1,2,3])
144
- settings.setdefault('pathogen_background', 100)
145
- settings.setdefault('pathogen_Signal_to_noise', 10)
146
- settings.setdefault('pathogen_CP_prob', 0)
147
- settings.setdefault('cell_background', 100)
148
- settings.setdefault('cell_Signal_to_noise', 10)
149
- settings.setdefault('cell_CP_prob', 0)
150
- settings.setdefault('nucleus_background', 100)
151
- settings.setdefault('nucleus_Signal_to_noise', 10)
152
- settings.setdefault('nucleus_CP_prob', 0)
153
-
154
- settings.setdefault('nucleus_FT', 100)
155
- settings.setdefault('cell_FT', 100)
156
- settings.setdefault('pathogen_FT', 100)
157
-
158
- # Plot settings
159
- settings.setdefault('plot', False)
160
- settings.setdefault('figuresize', 50)
161
- settings.setdefault('cmap', 'inferno')
162
- settings.setdefault('normalize', True)
163
- settings.setdefault('normalize_plots', True)
164
- settings.setdefault('examples_to_plot', 1)
165
-
166
- # Analasys settings
167
- settings.setdefault('pathogen_model', None)
168
- settings.setdefault('merge_pathogens', False)
169
- settings.setdefault('filter', False)
170
- settings.setdefault('lower_percentile', 2)
171
-
172
- # Timelapse settings
173
- settings.setdefault('timelapse', False)
174
- settings.setdefault('fps', 2)
175
- settings.setdefault('timelapse_displacement', None)
176
- settings.setdefault('timelapse_memory', 3)
177
- settings.setdefault('timelapse_frame_limits', None)
178
- settings.setdefault('timelapse_remove_transient', False)
179
- settings.setdefault('timelapse_mode', 'trackpy')
180
- settings.setdefault('timelapse_objects', 'cells')
181
-
182
- # Misc settings
183
- settings.setdefault('all_to_mip', False)
184
- settings.setdefault('pick_slice', False)
185
- settings.setdefault('skip_mode', '01')
186
- settings.setdefault('upscale', False)
187
- settings.setdefault('upscale_factor', 2.0)
188
- settings.setdefault('adjust_cells', False)
189
-
190
- return settings
191
-
192
- def set_default_settings_preprocess_img_data(settings):
193
-
194
- metadata_type = settings.setdefault('metadata_type', 'cellvoyager')
195
- custom_regex = settings.setdefault('custom_regex', None)
196
- nr = settings.setdefault('nr', 1)
197
- plot = settings.setdefault('plot', True)
198
- batch_size = settings.setdefault('batch_size', 50)
199
- timelapse = settings.setdefault('timelapse', False)
200
- lower_percentile = settings.setdefault('lower_percentile', 2)
201
- randomize = settings.setdefault('randomize', True)
202
- all_to_mip = settings.setdefault('all_to_mip', False)
203
- pick_slice = settings.setdefault('pick_slice', False)
204
- skip_mode = settings.setdefault('skip_mode', False)
205
-
206
- cmap = settings.setdefault('cmap', 'inferno')
207
- figuresize = settings.setdefault('figuresize', 50)
208
- normalize = settings.setdefault('normalize', True)
209
- save_dtype = settings.setdefault('save_dtype', 'uint16')
210
-
211
- test_mode = settings.setdefault('test_mode', False)
212
- test_images = settings.setdefault('test_images', 10)
213
- random_test = settings.setdefault('random_test', True)
214
-
215
- return settings, metadata_type, custom_regex, nr, plot, batch_size, timelapse, lower_percentile, randomize, all_to_mip, pick_slice, skip_mode, cmap, figuresize, normalize, save_dtype, test_mode, test_images, random_test
216
-
217
93
  def smooth_hull_lines(cluster_data):
218
94
  hull = ConvexHull(cluster_data)
219
95
 
@@ -3631,122 +3507,6 @@ def correct_paths(df, base_path):
3631
3507
  image_paths = df['png_path'].to_list()
3632
3508
  return df, image_paths
3633
3509
 
3634
- def get_umap_image_settings(settings={}):
3635
- settings.setdefault('src', 'path')
3636
- settings.setdefault('row_limit', 1000)
3637
- settings.setdefault('tables', ['cell', 'cytoplasm', 'nucleus', 'pathogen'])
3638
- settings.setdefault('visualize', 'cell')
3639
- settings.setdefault('image_nr', 16)
3640
- settings.setdefault('dot_size', 50)
3641
- settings.setdefault('n_neighbors', 1000)
3642
- settings.setdefault('min_dist', 0.1)
3643
- settings.setdefault('metric', 'euclidean')
3644
- settings.setdefault('eps', 0.5)
3645
- settings.setdefault('min_samples', 1000)
3646
- settings.setdefault('filter_by', 'channel_0')
3647
- settings.setdefault('img_zoom', 0.5)
3648
- settings.setdefault('plot_by_cluster', True)
3649
- settings.setdefault('plot_cluster_grids', True)
3650
- settings.setdefault('remove_cluster_noise', True)
3651
- settings.setdefault('remove_highly_correlated', True)
3652
- settings.setdefault('log_data', False)
3653
- settings.setdefault('figuresize', 60)
3654
- settings.setdefault('black_background', True)
3655
- settings.setdefault('remove_image_canvas', False)
3656
- settings.setdefault('plot_outlines', True)
3657
- settings.setdefault('plot_points', True)
3658
- settings.setdefault('smooth_lines', True)
3659
- settings.setdefault('clustering', 'dbscan')
3660
- settings.setdefault('exclude', None)
3661
- settings.setdefault('col_to_compare', 'col')
3662
- settings.setdefault('pos', 'c1')
3663
- settings.setdefault('neg', 'c2')
3664
- settings.setdefault('embedding_by_controls', False)
3665
- settings.setdefault('plot_images', True)
3666
- settings.setdefault('reduction_method','umap')
3667
- settings.setdefault('save_figure', False)
3668
- settings.setdefault('n_jobs', -1)
3669
- settings.setdefault('color_by', None)
3670
- settings.setdefault('neg', 'c1')
3671
- settings.setdefault('pos', 'c2')
3672
- settings.setdefault('mix', 'c3')
3673
- settings.setdefault('mix', 'c3')
3674
- settings.setdefault('exclude_conditions', None)
3675
- settings.setdefault('analyze_clusters', False)
3676
- settings.setdefault('resnet_features', False)
3677
- settings.setdefault('verbose',True)
3678
- return settings
3679
-
3680
- def get_measure_crop_settings(settings):
3681
-
3682
- # Test mode
3683
- settings.setdefault('test_mode', False)
3684
- settings.setdefault('test_nr', 10)
3685
-
3686
- #measurement settings
3687
- settings.setdefault('save_measurements',True)
3688
- settings.setdefault('radial_dist', True)
3689
- settings.setdefault('calculate_correlation', True)
3690
- settings.setdefault('manders_thresholds', [15,85,95])
3691
- settings.setdefault('homogeneity', True)
3692
- settings.setdefault('homogeneity_distances', [8,16,32])
3693
-
3694
- # Cropping settings
3695
- settings.setdefault('save_arrays', False)
3696
- settings.setdefault('save_png',True)
3697
- settings.setdefault('use_bounding_box',False)
3698
- settings.setdefault('png_size',[224,224])
3699
- settings.setdefault('png_dims',[0,1,2])
3700
- settings.setdefault('normalize',False)
3701
- settings.setdefault('normalize_by','png')
3702
- settings.setdefault('crop_mode',['cell'])
3703
- settings.setdefault('dialate_pngs', False)
3704
- settings.setdefault('dialate_png_ratios', [0.2])
3705
-
3706
- # Timelapsed settings
3707
- settings.setdefault('timelapse', False)
3708
- settings.setdefault('timelapse_objects', 'cell')
3709
-
3710
- # Operational settings
3711
- settings.setdefault('plot',False)
3712
- settings.setdefault('plot_filtration',False)
3713
- settings.setdefault('representative_images', False)
3714
- settings.setdefault('max_workers', os.cpu_count()-2)
3715
-
3716
- # Object settings
3717
- settings.setdefault('cell_mask_dim',None)
3718
- settings.setdefault('nucleus_mask_dim',None)
3719
- settings.setdefault('pathogen_mask_dim',None)
3720
- settings.setdefault('cytoplasm',False)
3721
- settings.setdefault('include_uninfected',True)
3722
- settings.setdefault('cell_min_size',0)
3723
- settings.setdefault('nucleus_min_size',0)
3724
- settings.setdefault('pathogen_min_size',0)
3725
- settings.setdefault('cytoplasm_min_size',0)
3726
- settings.setdefault('merge_edge_pathogen_cells', True)
3727
-
3728
- # Miscellaneous settings
3729
- settings.setdefault('experiment', 'exp')
3730
- settings.setdefault('cells', 'HeLa')
3731
- settings.setdefault('cell_loc', None)
3732
- settings.setdefault('pathogens', ['ME49Dku80WT', 'ME49Dku80dgra8:GRA8', 'ME49Dku80dgra8', 'ME49Dku80TKO'])
3733
- settings.setdefault('pathogen_loc', [['c1', 'c2', 'c3', 'c4', 'c5', 'c6'], ['c7', 'c8', 'c9', 'c10', 'c11', 'c12'], ['c13', 'c14', 'c15', 'c16', 'c17', 'c18'], ['c19', 'c20', 'c21', 'c22', 'c23', 'c24']])
3734
- settings.setdefault('treatments', ['BR1', 'BR2', 'BR3'])
3735
- settings.setdefault('treatment_loc', [['c1', 'c2', 'c7', 'c8', 'c13', 'c14', 'c19', 'c20'], ['c3', 'c4', 'c9', 'c10', 'c15', 'c16', 'c21', 'c22'], ['c5', 'c6', 'c11', 'c12', 'c17', 'c18', 'c23', 'c24']])
3736
- settings.setdefault('channel_of_interest', 2)
3737
- settings.setdefault('compartments', ['pathogen', 'cytoplasm'])
3738
- settings.setdefault('measurement', 'mean_intensity')
3739
- settings.setdefault('nr_imgs', 32)
3740
- settings.setdefault('um_per_pixel', 0.1)
3741
-
3742
- if settings['test_mode']:
3743
- settings['plot'] = True
3744
- settings['plot_filtration'] = True
3745
- test_imgs = settings['test_nr']
3746
- print(f'Test mode enabled with {test_imgs} images, plotting set to True')
3747
-
3748
- return settings
3749
-
3750
3510
  def delete_folder(folder_path):
3751
3511
  if os.path.exists(folder_path) and os.path.isdir(folder_path):
3752
3512
  for root, dirs, files in os.walk(folder_path, topdown=False):
@@ -3833,7 +3593,57 @@ def preprocess_data(df, filter_by, remove_highly_correlated, log_data, exclude):
3833
3593
 
3834
3594
  return numeric_data
3835
3595
 
3836
- def filter_dataframe_features(df, channel_of_interest, exclude=None):
3596
+ def remove_low_variance_columns(df, threshold=0.01, verbose=False):
3597
+ """
3598
+ Removes columns from the dataframe that have low variance.
3599
+
3600
+ Parameters:
3601
+ df (pandas.DataFrame): The DataFrame containing the data.
3602
+ threshold (float): The variance threshold below which columns will be removed.
3603
+
3604
+ Returns:
3605
+ pandas.DataFrame: The DataFrame with low variance columns removed.
3606
+ """
3607
+
3608
+ numerical_cols = df.select_dtypes(include=[np.number])
3609
+ low_variance_cols = numerical_cols.var()[numerical_cols.var() < threshold].index.tolist()
3610
+
3611
+ if verbose:
3612
+ print(f"Removed columns due to low variance: {low_variance_cols}")
3613
+
3614
+ df = df.drop(columns=low_variance_cols)
3615
+
3616
+ return df
3617
+
3618
+ def remove_highly_correlated_columns(df, threshold=0.95, verbose=False):
3619
+ """
3620
+ Removes columns from the dataframe that are highly correlated with one another.
3621
+
3622
+ Parameters:
3623
+ df (pandas.DataFrame): The DataFrame containing the data.
3624
+ threshold (float): The correlation threshold above which columns will be removed.
3625
+
3626
+ Returns:
3627
+ pandas.DataFrame: The DataFrame with highly correlated columns removed.
3628
+ """
3629
+ numerical_cols = df.select_dtypes(include=[np.number])
3630
+ corr_matrix = numerical_cols.corr().abs()
3631
+
3632
+ # Upper triangle of the correlation matrix
3633
+ upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
3634
+
3635
+ # Find columns with correlation greater than the threshold
3636
+ to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
3637
+
3638
+ if verbose:
3639
+ print(f"Removed columns due to high correlation: {to_drop}")
3640
+
3641
+ df = df.drop(columns=to_drop)
3642
+
3643
+ return df
3644
+
3645
+ def filter_dataframe_features(df, channel_of_interest, exclude=None, remove_low_variance_features=True, remove_highly_correlated_features=True, verbose=False):
3646
+
3837
3647
  """
3838
3648
  Filter the dataframe `df` based on the specified `channel_of_interest` and `exclude` parameters.
3839
3649
 
@@ -3847,43 +3657,54 @@ def filter_dataframe_features(df, channel_of_interest, exclude=None):
3847
3657
  - features (list): The list of selected features after filtering.
3848
3658
 
3849
3659
  """
3850
- if channel_of_interest is None:
3851
- feature_string = None
3852
- elif channel_of_interest == 'morphology':
3853
- feature_string = 'morphology'
3854
- elif isinstance(channel_of_interest, list):
3855
- feature_string = []
3856
- for i in channel_of_interest:
3857
- feature_string_tmp = f'channel_{i}'
3858
- feature_string.append(feature_string_tmp)
3859
- elif isinstance(channel_of_interest, int):
3860
- feature_string = f'channel_{channel_of_interest}'
3861
- elif isinstance(channel_of_interest, str):
3862
- feature_string = channel_of_interest
3863
3660
 
3864
- # Remove columns with a single value
3865
- df = df.loc[:, df.nunique() > 1]
3866
-
3867
- # Select numerical features
3868
- features = df.select_dtypes(include=[np.number]).columns.tolist()
3869
-
3870
- if feature_string is not None:
3871
- feature_list = ['channel_0', 'channel_1', 'channel_2', 'channel_3']
3661
+ count_and_id_columns = [col for col in df.columns if '_id' in col or 'count' in col]
3662
+ if 'pathogen_pathogen' in df.columns:
3663
+ count_and_id_columns.append('pathogen_pathogen')
3664
+ if 'cell_cell' in df.columns:
3665
+ count_and_id_columns.append('cell_cell')
3666
+ if 'nucleus_nucleus' in df.columns:
3667
+ count_and_id_columns.append('nucleus_nucleus')
3668
+ if 'cytoplasm_cytoplasm' in df.columns:
3669
+ count_and_id_columns.append('cytoplasm_cytoplasm')
3670
+
3671
+ if verbose:
3672
+ print("Columns to remove:", count_and_id_columns)
3673
+ df = df.drop(columns=count_and_id_columns)
3674
+
3675
+ if not channel_of_interest is None:
3676
+ drop_columns = ['channel_1', 'channel_2', 'channel_3', 'channel_4']
3677
+
3678
+ if isinstance(channel_of_interest, list):
3679
+ feature_strings = [f"channel_{channel}" for channel in channel_of_interest]
3680
+
3681
+ elif isinstance(channel_of_interest, int):
3682
+ feature_string = f"channel_{channel_of_interest}"
3683
+ feature_strings = [feature_string]
3684
+ elif channel_of_interest == 'morphology':
3685
+ morphological_features = ['area', 'area_bbox', 'major_axis_length', 'minor_axis_length', 'eccentricity', 'extent', 'perimeter', 'euler_number', 'solidity', 'zernike_0', 'zernike_1', 'zernike_2', 'zernike_3', 'zernike_4', 'zernike_5', 'zernike_6', 'zernike_7', 'zernike_8', 'zernike_9', 'zernike_10', 'zernike_11', 'zernike_12', 'zernike_13', 'zernike_14', 'zernike_15', 'zernike_16', 'zernike_17', 'zernike_18', 'zernike_19', 'zernike_20', 'zernike_21', 'zernike_22', 'zernike_23', 'zernike_24', 'area_filled', 'convex_area', 'equivalent_diameter_area', 'feret_diameter_max']
3686
+ morphological_columns = [item for item in df.columns.tolist() if any(base in item for base in morphological_features)]
3687
+ columns_to_drop = [col for col in df.columns if col not in morphological_columns]
3688
+
3689
+ if channel_of_interest != 'morphology':
3690
+ # Remove entries from drop_columns that are also in feature_strings
3691
+ drop_columns = [col for col in drop_columns if col not in feature_strings]
3872
3692
 
3873
- # Remove feature_string from the list if it exists
3874
- if isinstance(feature_string, str):
3875
- if feature_string in feature_list:
3876
- feature_list.remove(feature_string)
3877
- elif isinstance(feature_string, list):
3878
- feature_list = [feature for feature in feature_list if feature not in feature_string]
3693
+ # Remove columns from the DataFrame that contain any entry from drop_columns in the column name
3694
+ columns_to_drop = [col for col in df.columns if any(drop_col in col for drop_col in drop_columns) or all(fs not in col for fs in feature_strings)]
3879
3695
 
3880
- if feature_string != 'morphology':
3881
- features = [feature for feature in features if feature_string in feature]
3696
+ df = df.drop(columns=columns_to_drop)
3697
+ if verbose:
3698
+ print(f"Removed columns: {columns_to_drop}")
3699
+
3700
+ if remove_low_variance_features:
3701
+ df = remove_low_variance_columns(df, threshold=0.01, verbose=verbose)
3702
+
3703
+ if remove_highly_correlated_features:
3704
+ df = remove_highly_correlated_columns(df, threshold=0.95, verbose=verbose)
3882
3705
 
3883
- # Iterate through the list and remove columns from df
3884
- for feature_ in feature_list:
3885
- features = [feature for feature in features if feature_ not in feature]
3886
- print(f'After removing {feature_} features: {len(features)}')
3706
+ # Select numerical features
3707
+ features = df.select_dtypes(include=[np.number]).columns.tolist()
3887
3708
 
3888
3709
  if isinstance(exclude, list):
3889
3710
  features = [feature for feature in features if feature not in exclude]
@@ -4292,4 +4113,220 @@ def process_masks(mask_folder, image_folder, channel, batch_size=50, n_clusters=
4292
4113
  largest_cluster_label = np.bincount(batch_labels).argmax()
4293
4114
  cleaned_mask = remove_objects_not_in_largest_cluster(mask, batch_labels, largest_cluster_label)
4294
4115
  np.save(mask_files[i], cleaned_mask)
4295
- label_index += len(batch_properties)
4116
+ label_index += len(batch_properties)
4117
+
4118
+ def merge_regression_res_with_metadata(results_file, metadata_file, name='_metadata'):
4119
+ # Read the CSV files into dataframes
4120
+ df_results = pd.read_csv(results_file)
4121
+ df_metadata = pd.read_csv(metadata_file)
4122
+
4123
+ def extract_and_clean_gene(feature):
4124
+ # Extract the part between '[' and ']'
4125
+ match = re.search(r'\[(.*?)\]', feature)
4126
+ if match:
4127
+ gene = match.group(1)
4128
+ # Remove 'T.' if present
4129
+ gene = re.sub(r'^T\.', '', gene)
4130
+ # Remove everything after and including '_'
4131
+ gene = gene.split('_')[0]
4132
+ return gene
4133
+ return None
4134
+
4135
+ # Apply the function to the feature column
4136
+ df_results['gene'] = df_results['feature'].apply(extract_and_clean_gene)
4137
+
4138
+ df_metadata['gene'] = df_metadata['Gene ID'].apply(lambda x: x.split('_')[1] if '_' in x else None)
4139
+
4140
+ # Drop rows where gene extraction failed
4141
+ df_results = df_results.dropna(subset=['gene'])
4142
+
4143
+ # Merge the two dataframes on the gene column
4144
+ merged_df = pd.merge(df_results, df_metadata, on='gene')
4145
+
4146
+ # Generate the new file name
4147
+ base, ext = os.path.splitext(results_file)
4148
+ new_file = f"{base}{name}{ext}"
4149
+
4150
+ # Save the merged dataframe to the new file
4151
+ merged_df.to_csv(new_file, index=False)
4152
+
4153
+ return merged_df
4154
+
4155
+ def process_vision_results(df, threshold=0.5):
4156
+
4157
+ # Split the 'path' column using _map_wells function
4158
+ mapped_values = df['path'].apply(lambda x: _map_wells(x))
4159
+
4160
+ df['plate'] = mapped_values.apply(lambda x: x[0])
4161
+ df['row'] = mapped_values.apply(lambda x: x[1])
4162
+ df['column'] = mapped_values.apply(lambda x: x[2])
4163
+ df['field'] = mapped_values.apply(lambda x: x[3])
4164
+ df['object'] = df['path'].str.split('_').str[3].str.split('.').str[0]
4165
+ df['prc'] = df['plate'].astype(str) + '_' + df['row'].astype(str) + '_' + df['column'].astype(str)
4166
+ df['cv_predictions'] = (df['pred'] >= threshold).astype(int)
4167
+
4168
+ return df
4169
+
4170
+ def get_ml_results_paths(src, model_type='xgboost', channel_of_interest=1):
4171
+
4172
+ if isinstance(channel_of_interest, list):
4173
+ feature_string = "channels_" + "_".join(map(str, channel_of_interest))
4174
+
4175
+ elif isinstance(channel_of_interest, int):
4176
+ feature_string = f"channel_{channel_of_interest}"
4177
+
4178
+ elif channel_of_interest is 'morphology':
4179
+ feature_string = 'morphology'
4180
+
4181
+ elif channel_of_interest is None:
4182
+ feature_string = 'all_features'
4183
+ else:
4184
+ raise ValueError(f"Unsupported channel_of_interest: {channel_of_interest}. Supported values are 'int', 'list', 'None', or 'morphology'.")
4185
+
4186
+ res_fldr = os.path.join(src, 'results', model_type, feature_string)
4187
+ print(f'Saving results to {res_fldr}')
4188
+ os.makedirs(res_fldr, exist_ok=True)
4189
+ data_path = os.path.join(res_fldr, 'results.csv')
4190
+ permutation_path = os.path.join(res_fldr, 'permutation.csv')
4191
+ feature_importance_path = os.path.join(res_fldr, 'feature_importance.csv')
4192
+ model_metricks_path = os.path.join(res_fldr, f'{model_type}_model.csv')
4193
+ permutation_fig_path = os.path.join(res_fldr, 'permutation.pdf')
4194
+ feature_importance_fig_path = os.path.join(res_fldr, 'feature_importance.pdf')
4195
+ shap_fig_path = os.path.join(res_fldr, 'shap.pdf')
4196
+ plate_heatmap_path = os.path.join(res_fldr, 'plate_heatmap.pdf')
4197
+ settings_csv = os.path.join(res_fldr, 'ml_settings.csv')
4198
+ return data_path, permutation_path, feature_importance_path, model_metricks_path, permutation_fig_path, feature_importance_fig_path, shap_fig_path, plate_heatmap_path, settings_csv
4199
+
4200
+ def augment_image(image):
4201
+ """
4202
+ Perform data augmentation by rotating and reflecting the image.
4203
+
4204
+ Parameters:
4205
+ - image (PIL Image or numpy array): The input image.
4206
+
4207
+ Returns:
4208
+ - augmented_images (list): A list of augmented images.
4209
+ """
4210
+ augmented_images = []
4211
+
4212
+ # Convert PIL image to numpy array if necessary
4213
+ if isinstance(image, Image.Image):
4214
+ image = np.array(image)
4215
+
4216
+ # Handle grayscale images
4217
+ if len(image.shape) == 2:
4218
+ image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
4219
+
4220
+ # Rotations and reflections
4221
+ transformations = [
4222
+ None, # Original
4223
+ cv2.ROTATE_90_CLOCKWISE,
4224
+ cv2.ROTATE_180,
4225
+ cv2.ROTATE_90_COUNTERCLOCKWISE
4226
+ ]
4227
+
4228
+ for transform in transformations:
4229
+ if transform is not None:
4230
+ rotated = cv2.rotate(image, transform)
4231
+ else:
4232
+ rotated = image
4233
+ augmented_images.append(rotated)
4234
+
4235
+ # Reflections
4236
+ flipped = cv2.flip(rotated, 1)
4237
+ augmented_images.append(flipped)
4238
+
4239
+ # Convert numpy arrays back to PIL images
4240
+ augmented_images = [Image.fromarray(img) for img in augmented_images]
4241
+
4242
+ return augmented_images
4243
+
4244
+ def augment_dataset(dataset, is_grayscale=False):
4245
+ """
4246
+ Perform data augmentation on the entire dataset by rotating and reflecting the images.
4247
+
4248
+ Parameters:
4249
+ - dataset (list of tuples): The input dataset, each entry is a tuple (image, label, filename).
4250
+ - is_grayscale (bool): Flag indicating if the images are grayscale.
4251
+
4252
+ Returns:
4253
+ - augmented_dataset (list of tuples): A dataset with augmented (image, label, filename) tuples.
4254
+ """
4255
+ augmented_dataset = []
4256
+
4257
+ for img, label, filename in dataset:
4258
+ augmented_images = []
4259
+
4260
+ # Ensure the image is a tensor
4261
+ if not isinstance(img, torch.Tensor):
4262
+ raise TypeError(f"Expected torch.Tensor, got {type(img)}")
4263
+
4264
+ # Rotations and reflections
4265
+ angles = [0, 90, 180, 270]
4266
+
4267
+ for angle in angles:
4268
+ rotated = torchvision.transforms.functional.rotate(img, angle)
4269
+ augmented_images.append(rotated)
4270
+
4271
+ # Reflections
4272
+ flipped = torchvision.transforms.functional.hflip(rotated)
4273
+ augmented_images.append(flipped)
4274
+
4275
+ # Add augmented images to the dataset
4276
+ for aug_img in augmented_images:
4277
+ augmented_dataset.append((aug_img, label, filename))
4278
+
4279
+ return augmented_dataset
4280
+
4281
+
4282
+ def convert_and_relabel_masks(folder_path):
4283
+ """
4284
+ Converts all int64 npy masks in a folder to uint16 with relabeling to ensure all labels are retained.
4285
+
4286
+ Parameters:
4287
+ - folder_path (str): The path to the folder containing int64 npy mask files.
4288
+
4289
+ Returns:
4290
+ - None
4291
+ """
4292
+ files = [f for f in os.listdir(folder_path) if f.endswith('.npy')]
4293
+
4294
+ for file in files:
4295
+ file_path = os.path.join(folder_path, file)
4296
+ # Load the mask
4297
+ mask = np.load(file_path)
4298
+ #print(mask.shape)
4299
+ #print(mask.dtype)
4300
+ # Check the current dtype
4301
+ if mask.dtype != np.int64:
4302
+ print(f"Skipping {file} as it is not int64.")
4303
+ continue
4304
+
4305
+ # Relabel the mask to ensure unique labels within uint16 range
4306
+ unique_labels = np.unique(mask)
4307
+ if unique_labels.max() > 65535:
4308
+ print(f"Warning: The mask in {file} contains values that exceed the uint16 range and will be relabeled.")
4309
+
4310
+ relabeled_mask = measure.label(mask, background=0)
4311
+
4312
+ # Check that relabeling worked correctly
4313
+ unique_relabeled = np.unique(relabeled_mask)
4314
+ if unique_relabeled.max() > 65535:
4315
+ print(f"Error: Relabeling failed for {file} as it still contains values that exceed the uint16 range.")
4316
+ continue
4317
+
4318
+ # Convert to uint16
4319
+ relabeled_mask = relabeled_mask.astype(np.uint16)
4320
+
4321
+ # Save the converted mask
4322
+ np.save(file_path, relabeled_mask)
4323
+
4324
+ print(f"Converted {file} and saved as uint16_{file}")
4325
+
4326
+ def correct_masks(src):
4327
+
4328
+ from .utils import _load_and_concatenate_arrays
4329
+
4330
+ cell_path = os.path.join(src,'norm_channel_stack', 'cell_mask_stack')
4331
+ convert_and_relabel_masks(cell_path)
4332
+ _load_and_concatenate_arrays(src, [0,1,2,3], 1, 0, 2)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spacr
3
- Version: 0.0.81
3
+ Version: 0.1.0
4
4
  Summary: Spatial phenotype analysis of crisp screens (SpaCr)
5
5
  Home-page: https://github.com/EinarOlafsson/spacr
6
6
  Author: Einar Birnir Olafsson
@@ -0,0 +1,40 @@
1
+ spacr/__init__.py,sha256=rnb_oYH6HmC1KvJmc7ymrdtHvmMW5t7bn8tJa03cxcA,1286
2
+ spacr/__main__.py,sha256=bkAJJD2kjIqOP-u1kLvct9jQQCeUXzlEjdgitwi1Lm8,75
3
+ spacr/alpha.py,sha256=Y95sLEfpK2OSYKRn3M8eUOU33JJeXfV8zhrC4KnwSTY,35244
4
+ spacr/annotate_app.py,sha256=2X_xnXFN_w19RG99awsTPLzQfQZyQdwbaT-lcRxyV-w,20670
5
+ spacr/annotate_app_v2.py,sha256=kvikj_QbN4EHdyYwB0kjEepEuq2uVwfAF-VJ531qO3Q,22647
6
+ spacr/chris.py,sha256=YlBjSgeZaY8HPy6jkrT_ISAnCMAKVfvCxF0I9eAZLFM,2418
7
+ spacr/cli.py,sha256=507jfOOEV8BoL4eeUcblvH-iiDHdBrEVJLu1ghAAPSc,1800
8
+ spacr/core.py,sha256=m9fsk-qDPow4AzOYpTIsd4jT7PF_L_4y5xillR5eRdk,160253
9
+ spacr/deep_spacr.py,sha256=N0o7ILD2p1FTfU4DFxnpjs00xjLhwib-ev0XGqA6muU,37035
10
+ spacr/foldseek.py,sha256=YIP1d4Ci6CeA9jSyiv-HTDbNmAmcSM9Y_DaOs7wYzLY,33546
11
+ spacr/get_alfafold_structures.py,sha256=ehx_MQgb12k3hFecP6cYVlm5TLO8iWjgevy8ESyS3cw,3544
12
+ spacr/graph_learning.py,sha256=1tR-ZxvXE3dBz1Saw7BeVFcrsUFu9OlUZeZVifih9eo,13070
13
+ spacr/gui.py,sha256=zu-i8ezLJ03jNRACK7CRgNhkM8g8-pJFwZ-OSDFzsPg,6498
14
+ spacr/gui_2.py,sha256=FPlmvGm1VIood_YBnG44IafgjjaVfagybTnjVEOs5Ig,3299
15
+ spacr/gui_classify_app.py,sha256=LY33wott1mR7AFYwBI9ZQZYY16lBB-wuaY4pL_poaQ0,7884
16
+ spacr/gui_mask_app.py,sha256=WKkAH0jv-SnfaZdJ8MkC7mkUIVSSrNE8lUfH3QBvUak,9747
17
+ spacr/gui_measure_app.py,sha256=5vjjds5NFaOcE8XeuWDug9k-NI4jbTrwp54sJ7DNaNI,9625
18
+ spacr/gui_sim_app.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ spacr/gui_utils.py,sha256=JRWwmGEEVSPgs0UtZRukdNwIUJepbP675_Fvs5qocPk,49718
20
+ spacr/io.py,sha256=IoERqSwoxJrInYl-E0WfwFOEDZXFdJofk5DmpbyLGWM,112077
21
+ spacr/logger.py,sha256=7Zqr3TuuOQLWT32gYr2q1qvv7x0a2JhLANmZcnBXAW8,670
22
+ spacr/mask_app.py,sha256=jlKmj_evveIkkyH3PYEcAshcLXN0DOPWB1oc4hAwq9E,44201
23
+ spacr/measure.py,sha256=0FRsHF5ftar4JZ0B_6Nq-NlyP5t6aiO0IrskyikIBEE,55000
24
+ spacr/old_code.py,sha256=jw67DAGoLBd7mWofVzRJSEmCI1Qrff26zIo65SEkV00,13817
25
+ spacr/plot.py,sha256=lrwU51OTWfby1wx73XGyjYmTjLVia7WOmGH5LZZ-4jM,67145
26
+ spacr/sequencing.py,sha256=U_TBJGNfOBfokGegUe950W_KPfm51VOgpfibXoZ8RMQ,83974
27
+ spacr/settings.py,sha256=Tr2fo2I75FGfmEVQOONOpGwqXMzFCrYMz4NAxav3ckg,21183
28
+ spacr/sim.py,sha256=FveaVgBi3eypO2oVB5Dx-v0CC1Ny7UPfXkJiiRRodAk,71212
29
+ spacr/timelapse.py,sha256=KMYCgHzf9LTZe-lWl5mvH2EjbKRE6OhpwdY13wEumGc,39504
30
+ spacr/utils.py,sha256=O7dpCF3bU95d2v0UuPFeJtzXYrkh0r-6aLxaqkKkFwY,184619
31
+ spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
32
+ spacr/models/cp/toxo_plaque_cyto_e25000_X1120_Y1120.CP_model,sha256=z8BbHWZPRnE9D_BHO0fBREE85c1vkltDs-incs2ytXQ,26566572
33
+ spacr/models/cp/toxo_plaque_cyto_e25000_X1120_Y1120.CP_model_settings.csv,sha256=fBAGuL_B8ERVdVizO3BHozTDSbZUh1yFzsYK3wkQN68,420
34
+ spacr/models/cp/toxo_pv_lumen.CP_model,sha256=2y_CindYhmTvVwBH39SNILF3rI3x9SsRn6qrMxHy3l0,26562451
35
+ spacr-0.1.0.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
36
+ spacr-0.1.0.dist-info/METADATA,sha256=AkkTVCUxEtm5QUQS4c58ZC0eaCx3ctpiFXchwjyRV9o,5157
37
+ spacr-0.1.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
38
+ spacr-0.1.0.dist-info/entry_points.txt,sha256=xncHsqD9MI5wj0_p4mgZlrB8dHm_g_qF0Ggo1c78LqY,315
39
+ spacr-0.1.0.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
40
+ spacr-0.1.0.dist-info/RECORD,,