spacr 0.0.82__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/__init__.py +4 -0
- spacr/annotate_app.py +4 -0
- spacr/annotate_app_v2.py +511 -0
- spacr/core.py +254 -172
- spacr/deep_spacr.py +137 -50
- spacr/graph_learning.py +28 -8
- spacr/gui.py +5 -5
- spacr/gui_2.py +106 -36
- spacr/gui_classify_app.py +3 -3
- spacr/gui_mask_app.py +34 -11
- spacr/gui_measure_app.py +32 -17
- spacr/gui_utils.py +96 -29
- spacr/io.py +227 -144
- spacr/measure.py +2 -1
- spacr/models/cp/toxo_plaque_cyto_e25000_X1120_Y1120.CP_model +0 -0
- spacr/models/cp/toxo_plaque_cyto_e25000_X1120_Y1120.CP_model_settings.csv +23 -0
- spacr/plot.py +102 -6
- spacr/sequencing.py +140 -91
- spacr/settings.py +477 -0
- spacr/timelapse.py +0 -3
- spacr/utils.py +312 -275
- {spacr-0.0.82.dist-info → spacr-0.1.1.dist-info}/METADATA +1 -1
- spacr-0.1.1.dist-info/RECORD +40 -0
- spacr-0.0.82.dist-info/RECORD +0 -36
- {spacr-0.0.82.dist-info → spacr-0.1.1.dist-info}/LICENSE +0 -0
- {spacr-0.0.82.dist-info → spacr-0.1.1.dist-info}/WHEEL +0 -0
- {spacr-0.0.82.dist-info → spacr-0.1.1.dist-info}/entry_points.txt +0 -0
- {spacr-0.0.82.dist-info → spacr-0.1.1.dist-info}/top_level.txt +0 -0
spacr/utils.py
CHANGED
@@ -90,130 +90,6 @@ def check_mask_folder(src,mask_fldr):
|
|
90
90
|
else:
|
91
91
|
return True
|
92
92
|
|
93
|
-
def set_default_plot_merge_settings():
|
94
|
-
settings = {}
|
95
|
-
settings.setdefault('include_noninfected', True)
|
96
|
-
settings.setdefault('include_multiinfected', True)
|
97
|
-
settings.setdefault('include_multinucleated', True)
|
98
|
-
settings.setdefault('remove_background', False)
|
99
|
-
settings.setdefault('filter_min_max', None)
|
100
|
-
settings.setdefault('channel_dims', [0,1,2,3])
|
101
|
-
settings.setdefault('backgrounds', [100,100,100,100])
|
102
|
-
settings.setdefault('cell_mask_dim', 4)
|
103
|
-
settings.setdefault('nucleus_mask_dim', 5)
|
104
|
-
settings.setdefault('pathogen_mask_dim', 6)
|
105
|
-
settings.setdefault('outline_thickness', 3)
|
106
|
-
settings.setdefault('outline_color', 'gbr')
|
107
|
-
settings.setdefault('overlay_chans', [1,2,3])
|
108
|
-
settings.setdefault('overlay', True)
|
109
|
-
settings.setdefault('normalization_percentiles', [2,98])
|
110
|
-
settings.setdefault('normalize', True)
|
111
|
-
settings.setdefault('print_object_number', True)
|
112
|
-
settings.setdefault('nr', 1)
|
113
|
-
settings.setdefault('figuresize', 50)
|
114
|
-
settings.setdefault('cmap', 'inferno')
|
115
|
-
settings.setdefault('verbose', True)
|
116
|
-
|
117
|
-
return settings
|
118
|
-
|
119
|
-
def set_default_settings_preprocess_generate_masks(src, settings={}):
|
120
|
-
# Main settings
|
121
|
-
settings['src'] = src
|
122
|
-
settings.setdefault('preprocess', True)
|
123
|
-
settings.setdefault('masks', True)
|
124
|
-
settings.setdefault('save', True)
|
125
|
-
settings.setdefault('batch_size', 50)
|
126
|
-
settings.setdefault('test_mode', False)
|
127
|
-
settings.setdefault('test_images', 10)
|
128
|
-
settings.setdefault('magnification', 20)
|
129
|
-
settings.setdefault('custom_regex', None)
|
130
|
-
settings.setdefault('metadata_type', 'cellvoyager')
|
131
|
-
settings.setdefault('workers', os.cpu_count()-4)
|
132
|
-
settings.setdefault('randomize', True)
|
133
|
-
settings.setdefault('verbose', True)
|
134
|
-
|
135
|
-
settings.setdefault('remove_background_cell', False)
|
136
|
-
settings.setdefault('remove_background_nucleus', False)
|
137
|
-
settings.setdefault('remove_background_pathogen', False)
|
138
|
-
|
139
|
-
# Channel settings
|
140
|
-
settings.setdefault('cell_channel', None)
|
141
|
-
settings.setdefault('nucleus_channel', None)
|
142
|
-
settings.setdefault('pathogen_channel', None)
|
143
|
-
settings.setdefault('channels', [0,1,2,3])
|
144
|
-
settings.setdefault('pathogen_background', 100)
|
145
|
-
settings.setdefault('pathogen_Signal_to_noise', 10)
|
146
|
-
settings.setdefault('pathogen_CP_prob', 0)
|
147
|
-
settings.setdefault('cell_background', 100)
|
148
|
-
settings.setdefault('cell_Signal_to_noise', 10)
|
149
|
-
settings.setdefault('cell_CP_prob', 0)
|
150
|
-
settings.setdefault('nucleus_background', 100)
|
151
|
-
settings.setdefault('nucleus_Signal_to_noise', 10)
|
152
|
-
settings.setdefault('nucleus_CP_prob', 0)
|
153
|
-
|
154
|
-
settings.setdefault('nucleus_FT', 100)
|
155
|
-
settings.setdefault('cell_FT', 100)
|
156
|
-
settings.setdefault('pathogen_FT', 100)
|
157
|
-
|
158
|
-
# Plot settings
|
159
|
-
settings.setdefault('plot', False)
|
160
|
-
settings.setdefault('figuresize', 50)
|
161
|
-
settings.setdefault('cmap', 'inferno')
|
162
|
-
settings.setdefault('normalize', True)
|
163
|
-
settings.setdefault('normalize_plots', True)
|
164
|
-
settings.setdefault('examples_to_plot', 1)
|
165
|
-
|
166
|
-
# Analasys settings
|
167
|
-
settings.setdefault('pathogen_model', None)
|
168
|
-
settings.setdefault('merge_pathogens', False)
|
169
|
-
settings.setdefault('filter', False)
|
170
|
-
settings.setdefault('lower_percentile', 2)
|
171
|
-
|
172
|
-
# Timelapse settings
|
173
|
-
settings.setdefault('timelapse', False)
|
174
|
-
settings.setdefault('fps', 2)
|
175
|
-
settings.setdefault('timelapse_displacement', None)
|
176
|
-
settings.setdefault('timelapse_memory', 3)
|
177
|
-
settings.setdefault('timelapse_frame_limits', None)
|
178
|
-
settings.setdefault('timelapse_remove_transient', False)
|
179
|
-
settings.setdefault('timelapse_mode', 'trackpy')
|
180
|
-
settings.setdefault('timelapse_objects', 'cells')
|
181
|
-
|
182
|
-
# Misc settings
|
183
|
-
settings.setdefault('all_to_mip', False)
|
184
|
-
settings.setdefault('pick_slice', False)
|
185
|
-
settings.setdefault('skip_mode', '01')
|
186
|
-
settings.setdefault('upscale', False)
|
187
|
-
settings.setdefault('upscale_factor', 2.0)
|
188
|
-
settings.setdefault('adjust_cells', False)
|
189
|
-
|
190
|
-
return settings
|
191
|
-
|
192
|
-
def set_default_settings_preprocess_img_data(settings):
|
193
|
-
|
194
|
-
metadata_type = settings.setdefault('metadata_type', 'cellvoyager')
|
195
|
-
custom_regex = settings.setdefault('custom_regex', None)
|
196
|
-
nr = settings.setdefault('nr', 1)
|
197
|
-
plot = settings.setdefault('plot', True)
|
198
|
-
batch_size = settings.setdefault('batch_size', 50)
|
199
|
-
timelapse = settings.setdefault('timelapse', False)
|
200
|
-
lower_percentile = settings.setdefault('lower_percentile', 2)
|
201
|
-
randomize = settings.setdefault('randomize', True)
|
202
|
-
all_to_mip = settings.setdefault('all_to_mip', False)
|
203
|
-
pick_slice = settings.setdefault('pick_slice', False)
|
204
|
-
skip_mode = settings.setdefault('skip_mode', False)
|
205
|
-
|
206
|
-
cmap = settings.setdefault('cmap', 'inferno')
|
207
|
-
figuresize = settings.setdefault('figuresize', 50)
|
208
|
-
normalize = settings.setdefault('normalize', True)
|
209
|
-
save_dtype = settings.setdefault('save_dtype', 'uint16')
|
210
|
-
|
211
|
-
test_mode = settings.setdefault('test_mode', False)
|
212
|
-
test_images = settings.setdefault('test_images', 10)
|
213
|
-
random_test = settings.setdefault('random_test', True)
|
214
|
-
|
215
|
-
return settings, metadata_type, custom_regex, nr, plot, batch_size, timelapse, lower_percentile, randomize, all_to_mip, pick_slice, skip_mode, cmap, figuresize, normalize, save_dtype, test_mode, test_images, random_test
|
216
|
-
|
217
93
|
def smooth_hull_lines(cluster_data):
|
218
94
|
hull = ConvexHull(cluster_data)
|
219
95
|
|
@@ -3631,122 +3507,6 @@ def correct_paths(df, base_path):
|
|
3631
3507
|
image_paths = df['png_path'].to_list()
|
3632
3508
|
return df, image_paths
|
3633
3509
|
|
3634
|
-
def get_umap_image_settings(settings={}):
|
3635
|
-
settings.setdefault('src', 'path')
|
3636
|
-
settings.setdefault('row_limit', 1000)
|
3637
|
-
settings.setdefault('tables', ['cell', 'cytoplasm', 'nucleus', 'pathogen'])
|
3638
|
-
settings.setdefault('visualize', 'cell')
|
3639
|
-
settings.setdefault('image_nr', 16)
|
3640
|
-
settings.setdefault('dot_size', 50)
|
3641
|
-
settings.setdefault('n_neighbors', 1000)
|
3642
|
-
settings.setdefault('min_dist', 0.1)
|
3643
|
-
settings.setdefault('metric', 'euclidean')
|
3644
|
-
settings.setdefault('eps', 0.5)
|
3645
|
-
settings.setdefault('min_samples', 1000)
|
3646
|
-
settings.setdefault('filter_by', 'channel_0')
|
3647
|
-
settings.setdefault('img_zoom', 0.5)
|
3648
|
-
settings.setdefault('plot_by_cluster', True)
|
3649
|
-
settings.setdefault('plot_cluster_grids', True)
|
3650
|
-
settings.setdefault('remove_cluster_noise', True)
|
3651
|
-
settings.setdefault('remove_highly_correlated', True)
|
3652
|
-
settings.setdefault('log_data', False)
|
3653
|
-
settings.setdefault('figuresize', 60)
|
3654
|
-
settings.setdefault('black_background', True)
|
3655
|
-
settings.setdefault('remove_image_canvas', False)
|
3656
|
-
settings.setdefault('plot_outlines', True)
|
3657
|
-
settings.setdefault('plot_points', True)
|
3658
|
-
settings.setdefault('smooth_lines', True)
|
3659
|
-
settings.setdefault('clustering', 'dbscan')
|
3660
|
-
settings.setdefault('exclude', None)
|
3661
|
-
settings.setdefault('col_to_compare', 'col')
|
3662
|
-
settings.setdefault('pos', 'c1')
|
3663
|
-
settings.setdefault('neg', 'c2')
|
3664
|
-
settings.setdefault('embedding_by_controls', False)
|
3665
|
-
settings.setdefault('plot_images', True)
|
3666
|
-
settings.setdefault('reduction_method','umap')
|
3667
|
-
settings.setdefault('save_figure', False)
|
3668
|
-
settings.setdefault('n_jobs', -1)
|
3669
|
-
settings.setdefault('color_by', None)
|
3670
|
-
settings.setdefault('neg', 'c1')
|
3671
|
-
settings.setdefault('pos', 'c2')
|
3672
|
-
settings.setdefault('mix', 'c3')
|
3673
|
-
settings.setdefault('mix', 'c3')
|
3674
|
-
settings.setdefault('exclude_conditions', None)
|
3675
|
-
settings.setdefault('analyze_clusters', False)
|
3676
|
-
settings.setdefault('resnet_features', False)
|
3677
|
-
settings.setdefault('verbose',True)
|
3678
|
-
return settings
|
3679
|
-
|
3680
|
-
def get_measure_crop_settings(settings):
|
3681
|
-
|
3682
|
-
# Test mode
|
3683
|
-
settings.setdefault('test_mode', False)
|
3684
|
-
settings.setdefault('test_nr', 10)
|
3685
|
-
|
3686
|
-
#measurement settings
|
3687
|
-
settings.setdefault('save_measurements',True)
|
3688
|
-
settings.setdefault('radial_dist', True)
|
3689
|
-
settings.setdefault('calculate_correlation', True)
|
3690
|
-
settings.setdefault('manders_thresholds', [15,85,95])
|
3691
|
-
settings.setdefault('homogeneity', True)
|
3692
|
-
settings.setdefault('homogeneity_distances', [8,16,32])
|
3693
|
-
|
3694
|
-
# Cropping settings
|
3695
|
-
settings.setdefault('save_arrays', False)
|
3696
|
-
settings.setdefault('save_png',True)
|
3697
|
-
settings.setdefault('use_bounding_box',False)
|
3698
|
-
settings.setdefault('png_size',[224,224])
|
3699
|
-
settings.setdefault('png_dims',[0,1,2])
|
3700
|
-
settings.setdefault('normalize',False)
|
3701
|
-
settings.setdefault('normalize_by','png')
|
3702
|
-
settings.setdefault('crop_mode',['cell'])
|
3703
|
-
settings.setdefault('dialate_pngs', False)
|
3704
|
-
settings.setdefault('dialate_png_ratios', [0.2])
|
3705
|
-
|
3706
|
-
# Timelapsed settings
|
3707
|
-
settings.setdefault('timelapse', False)
|
3708
|
-
settings.setdefault('timelapse_objects', 'cell')
|
3709
|
-
|
3710
|
-
# Operational settings
|
3711
|
-
settings.setdefault('plot',False)
|
3712
|
-
settings.setdefault('plot_filtration',False)
|
3713
|
-
settings.setdefault('representative_images', False)
|
3714
|
-
settings.setdefault('max_workers', os.cpu_count()-2)
|
3715
|
-
|
3716
|
-
# Object settings
|
3717
|
-
settings.setdefault('cell_mask_dim',None)
|
3718
|
-
settings.setdefault('nucleus_mask_dim',None)
|
3719
|
-
settings.setdefault('pathogen_mask_dim',None)
|
3720
|
-
settings.setdefault('cytoplasm',False)
|
3721
|
-
settings.setdefault('include_uninfected',True)
|
3722
|
-
settings.setdefault('cell_min_size',0)
|
3723
|
-
settings.setdefault('nucleus_min_size',0)
|
3724
|
-
settings.setdefault('pathogen_min_size',0)
|
3725
|
-
settings.setdefault('cytoplasm_min_size',0)
|
3726
|
-
settings.setdefault('merge_edge_pathogen_cells', True)
|
3727
|
-
|
3728
|
-
# Miscellaneous settings
|
3729
|
-
settings.setdefault('experiment', 'exp')
|
3730
|
-
settings.setdefault('cells', 'HeLa')
|
3731
|
-
settings.setdefault('cell_loc', None)
|
3732
|
-
settings.setdefault('pathogens', ['ME49Dku80WT', 'ME49Dku80dgra8:GRA8', 'ME49Dku80dgra8', 'ME49Dku80TKO'])
|
3733
|
-
settings.setdefault('pathogen_loc', [['c1', 'c2', 'c3', 'c4', 'c5', 'c6'], ['c7', 'c8', 'c9', 'c10', 'c11', 'c12'], ['c13', 'c14', 'c15', 'c16', 'c17', 'c18'], ['c19', 'c20', 'c21', 'c22', 'c23', 'c24']])
|
3734
|
-
settings.setdefault('treatments', ['BR1', 'BR2', 'BR3'])
|
3735
|
-
settings.setdefault('treatment_loc', [['c1', 'c2', 'c7', 'c8', 'c13', 'c14', 'c19', 'c20'], ['c3', 'c4', 'c9', 'c10', 'c15', 'c16', 'c21', 'c22'], ['c5', 'c6', 'c11', 'c12', 'c17', 'c18', 'c23', 'c24']])
|
3736
|
-
settings.setdefault('channel_of_interest', 2)
|
3737
|
-
settings.setdefault('compartments', ['pathogen', 'cytoplasm'])
|
3738
|
-
settings.setdefault('measurement', 'mean_intensity')
|
3739
|
-
settings.setdefault('nr_imgs', 32)
|
3740
|
-
settings.setdefault('um_per_pixel', 0.1)
|
3741
|
-
|
3742
|
-
if settings['test_mode']:
|
3743
|
-
settings['plot'] = True
|
3744
|
-
settings['plot_filtration'] = True
|
3745
|
-
test_imgs = settings['test_nr']
|
3746
|
-
print(f'Test mode enabled with {test_imgs} images, plotting set to True')
|
3747
|
-
|
3748
|
-
return settings
|
3749
|
-
|
3750
3510
|
def delete_folder(folder_path):
|
3751
3511
|
if os.path.exists(folder_path) and os.path.isdir(folder_path):
|
3752
3512
|
for root, dirs, files in os.walk(folder_path, topdown=False):
|
@@ -3833,7 +3593,57 @@ def preprocess_data(df, filter_by, remove_highly_correlated, log_data, exclude):
|
|
3833
3593
|
|
3834
3594
|
return numeric_data
|
3835
3595
|
|
3836
|
-
def
|
3596
|
+
def remove_low_variance_columns(df, threshold=0.01, verbose=False):
|
3597
|
+
"""
|
3598
|
+
Removes columns from the dataframe that have low variance.
|
3599
|
+
|
3600
|
+
Parameters:
|
3601
|
+
df (pandas.DataFrame): The DataFrame containing the data.
|
3602
|
+
threshold (float): The variance threshold below which columns will be removed.
|
3603
|
+
|
3604
|
+
Returns:
|
3605
|
+
pandas.DataFrame: The DataFrame with low variance columns removed.
|
3606
|
+
"""
|
3607
|
+
|
3608
|
+
numerical_cols = df.select_dtypes(include=[np.number])
|
3609
|
+
low_variance_cols = numerical_cols.var()[numerical_cols.var() < threshold].index.tolist()
|
3610
|
+
|
3611
|
+
if verbose:
|
3612
|
+
print(f"Removed columns due to low variance: {low_variance_cols}")
|
3613
|
+
|
3614
|
+
df = df.drop(columns=low_variance_cols)
|
3615
|
+
|
3616
|
+
return df
|
3617
|
+
|
3618
|
+
def remove_highly_correlated_columns(df, threshold=0.95, verbose=False):
|
3619
|
+
"""
|
3620
|
+
Removes columns from the dataframe that are highly correlated with one another.
|
3621
|
+
|
3622
|
+
Parameters:
|
3623
|
+
df (pandas.DataFrame): The DataFrame containing the data.
|
3624
|
+
threshold (float): The correlation threshold above which columns will be removed.
|
3625
|
+
|
3626
|
+
Returns:
|
3627
|
+
pandas.DataFrame: The DataFrame with highly correlated columns removed.
|
3628
|
+
"""
|
3629
|
+
numerical_cols = df.select_dtypes(include=[np.number])
|
3630
|
+
corr_matrix = numerical_cols.corr().abs()
|
3631
|
+
|
3632
|
+
# Upper triangle of the correlation matrix
|
3633
|
+
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
|
3634
|
+
|
3635
|
+
# Find columns with correlation greater than the threshold
|
3636
|
+
to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
|
3637
|
+
|
3638
|
+
if verbose:
|
3639
|
+
print(f"Removed columns due to high correlation: {to_drop}")
|
3640
|
+
|
3641
|
+
df = df.drop(columns=to_drop)
|
3642
|
+
|
3643
|
+
return df
|
3644
|
+
|
3645
|
+
def filter_dataframe_features(df, channel_of_interest, exclude=None, remove_low_variance_features=True, remove_highly_correlated_features=True, verbose=False):
|
3646
|
+
|
3837
3647
|
"""
|
3838
3648
|
Filter the dataframe `df` based on the specified `channel_of_interest` and `exclude` parameters.
|
3839
3649
|
|
@@ -3847,43 +3657,54 @@ def filter_dataframe_features(df, channel_of_interest, exclude=None):
|
|
3847
3657
|
- features (list): The list of selected features after filtering.
|
3848
3658
|
|
3849
3659
|
"""
|
3850
|
-
if channel_of_interest is None:
|
3851
|
-
feature_string = None
|
3852
|
-
elif channel_of_interest == 'morphology':
|
3853
|
-
feature_string = 'morphology'
|
3854
|
-
elif isinstance(channel_of_interest, list):
|
3855
|
-
feature_string = []
|
3856
|
-
for i in channel_of_interest:
|
3857
|
-
feature_string_tmp = f'channel_{i}'
|
3858
|
-
feature_string.append(feature_string_tmp)
|
3859
|
-
elif isinstance(channel_of_interest, int):
|
3860
|
-
feature_string = f'channel_{channel_of_interest}'
|
3861
|
-
elif isinstance(channel_of_interest, str):
|
3862
|
-
feature_string = channel_of_interest
|
3863
3660
|
|
3864
|
-
|
3865
|
-
|
3866
|
-
|
3867
|
-
|
3868
|
-
|
3869
|
-
|
3870
|
-
|
3871
|
-
|
3661
|
+
count_and_id_columns = [col for col in df.columns if '_id' in col or 'count' in col]
|
3662
|
+
if 'pathogen_pathogen' in df.columns:
|
3663
|
+
count_and_id_columns.append('pathogen_pathogen')
|
3664
|
+
if 'cell_cell' in df.columns:
|
3665
|
+
count_and_id_columns.append('cell_cell')
|
3666
|
+
if 'nucleus_nucleus' in df.columns:
|
3667
|
+
count_and_id_columns.append('nucleus_nucleus')
|
3668
|
+
if 'cytoplasm_cytoplasm' in df.columns:
|
3669
|
+
count_and_id_columns.append('cytoplasm_cytoplasm')
|
3670
|
+
|
3671
|
+
if verbose:
|
3672
|
+
print("Columns to remove:", count_and_id_columns)
|
3673
|
+
df = df.drop(columns=count_and_id_columns)
|
3674
|
+
|
3675
|
+
if not channel_of_interest is None:
|
3676
|
+
drop_columns = ['channel_1', 'channel_2', 'channel_3', 'channel_4']
|
3677
|
+
|
3678
|
+
if isinstance(channel_of_interest, list):
|
3679
|
+
feature_strings = [f"channel_{channel}" for channel in channel_of_interest]
|
3680
|
+
|
3681
|
+
elif isinstance(channel_of_interest, int):
|
3682
|
+
feature_string = f"channel_{channel_of_interest}"
|
3683
|
+
feature_strings = [feature_string]
|
3684
|
+
elif channel_of_interest == 'morphology':
|
3685
|
+
morphological_features = ['area', 'area_bbox', 'major_axis_length', 'minor_axis_length', 'eccentricity', 'extent', 'perimeter', 'euler_number', 'solidity', 'zernike_0', 'zernike_1', 'zernike_2', 'zernike_3', 'zernike_4', 'zernike_5', 'zernike_6', 'zernike_7', 'zernike_8', 'zernike_9', 'zernike_10', 'zernike_11', 'zernike_12', 'zernike_13', 'zernike_14', 'zernike_15', 'zernike_16', 'zernike_17', 'zernike_18', 'zernike_19', 'zernike_20', 'zernike_21', 'zernike_22', 'zernike_23', 'zernike_24', 'area_filled', 'convex_area', 'equivalent_diameter_area', 'feret_diameter_max']
|
3686
|
+
morphological_columns = [item for item in df.columns.tolist() if any(base in item for base in morphological_features)]
|
3687
|
+
columns_to_drop = [col for col in df.columns if col not in morphological_columns]
|
3688
|
+
|
3689
|
+
if channel_of_interest != 'morphology':
|
3690
|
+
# Remove entries from drop_columns that are also in feature_strings
|
3691
|
+
drop_columns = [col for col in drop_columns if col not in feature_strings]
|
3872
3692
|
|
3873
|
-
|
3874
|
-
|
3875
|
-
if feature_string in feature_list:
|
3876
|
-
feature_list.remove(feature_string)
|
3877
|
-
elif isinstance(feature_string, list):
|
3878
|
-
feature_list = [feature for feature in feature_list if feature not in feature_string]
|
3693
|
+
# Remove columns from the DataFrame that contain any entry from drop_columns in the column name
|
3694
|
+
columns_to_drop = [col for col in df.columns if any(drop_col in col for drop_col in drop_columns) or all(fs not in col for fs in feature_strings)]
|
3879
3695
|
|
3880
|
-
|
3881
|
-
|
3696
|
+
df = df.drop(columns=columns_to_drop)
|
3697
|
+
if verbose:
|
3698
|
+
print(f"Removed columns: {columns_to_drop}")
|
3699
|
+
|
3700
|
+
if remove_low_variance_features:
|
3701
|
+
df = remove_low_variance_columns(df, threshold=0.01, verbose=verbose)
|
3702
|
+
|
3703
|
+
if remove_highly_correlated_features:
|
3704
|
+
df = remove_highly_correlated_columns(df, threshold=0.95, verbose=verbose)
|
3882
3705
|
|
3883
|
-
|
3884
|
-
|
3885
|
-
features = [feature for feature in features if feature_ not in feature]
|
3886
|
-
print(f'After removing {feature_} features: {len(features)}')
|
3706
|
+
# Select numerical features
|
3707
|
+
features = df.select_dtypes(include=[np.number]).columns.tolist()
|
3887
3708
|
|
3888
3709
|
if isinstance(exclude, list):
|
3889
3710
|
features = [feature for feature in features if feature not in exclude]
|
@@ -4292,4 +4113,220 @@ def process_masks(mask_folder, image_folder, channel, batch_size=50, n_clusters=
|
|
4292
4113
|
largest_cluster_label = np.bincount(batch_labels).argmax()
|
4293
4114
|
cleaned_mask = remove_objects_not_in_largest_cluster(mask, batch_labels, largest_cluster_label)
|
4294
4115
|
np.save(mask_files[i], cleaned_mask)
|
4295
|
-
label_index += len(batch_properties)
|
4116
|
+
label_index += len(batch_properties)
|
4117
|
+
|
4118
|
+
def merge_regression_res_with_metadata(results_file, metadata_file, name='_metadata'):
|
4119
|
+
# Read the CSV files into dataframes
|
4120
|
+
df_results = pd.read_csv(results_file)
|
4121
|
+
df_metadata = pd.read_csv(metadata_file)
|
4122
|
+
|
4123
|
+
def extract_and_clean_gene(feature):
|
4124
|
+
# Extract the part between '[' and ']'
|
4125
|
+
match = re.search(r'\[(.*?)\]', feature)
|
4126
|
+
if match:
|
4127
|
+
gene = match.group(1)
|
4128
|
+
# Remove 'T.' if present
|
4129
|
+
gene = re.sub(r'^T\.', '', gene)
|
4130
|
+
# Remove everything after and including '_'
|
4131
|
+
gene = gene.split('_')[0]
|
4132
|
+
return gene
|
4133
|
+
return None
|
4134
|
+
|
4135
|
+
# Apply the function to the feature column
|
4136
|
+
df_results['gene'] = df_results['feature'].apply(extract_and_clean_gene)
|
4137
|
+
|
4138
|
+
df_metadata['gene'] = df_metadata['Gene ID'].apply(lambda x: x.split('_')[1] if '_' in x else None)
|
4139
|
+
|
4140
|
+
# Drop rows where gene extraction failed
|
4141
|
+
df_results = df_results.dropna(subset=['gene'])
|
4142
|
+
|
4143
|
+
# Merge the two dataframes on the gene column
|
4144
|
+
merged_df = pd.merge(df_results, df_metadata, on='gene')
|
4145
|
+
|
4146
|
+
# Generate the new file name
|
4147
|
+
base, ext = os.path.splitext(results_file)
|
4148
|
+
new_file = f"{base}{name}{ext}"
|
4149
|
+
|
4150
|
+
# Save the merged dataframe to the new file
|
4151
|
+
merged_df.to_csv(new_file, index=False)
|
4152
|
+
|
4153
|
+
return merged_df
|
4154
|
+
|
4155
|
+
def process_vision_results(df, threshold=0.5):
|
4156
|
+
|
4157
|
+
# Split the 'path' column using _map_wells function
|
4158
|
+
mapped_values = df['path'].apply(lambda x: _map_wells(x))
|
4159
|
+
|
4160
|
+
df['plate'] = mapped_values.apply(lambda x: x[0])
|
4161
|
+
df['row'] = mapped_values.apply(lambda x: x[1])
|
4162
|
+
df['column'] = mapped_values.apply(lambda x: x[2])
|
4163
|
+
df['field'] = mapped_values.apply(lambda x: x[3])
|
4164
|
+
df['object'] = df['path'].str.split('_').str[3].str.split('.').str[0]
|
4165
|
+
df['prc'] = df['plate'].astype(str) + '_' + df['row'].astype(str) + '_' + df['column'].astype(str)
|
4166
|
+
df['cv_predictions'] = (df['pred'] >= threshold).astype(int)
|
4167
|
+
|
4168
|
+
return df
|
4169
|
+
|
4170
|
+
def get_ml_results_paths(src, model_type='xgboost', channel_of_interest=1):
|
4171
|
+
|
4172
|
+
if isinstance(channel_of_interest, list):
|
4173
|
+
feature_string = "channels_" + "_".join(map(str, channel_of_interest))
|
4174
|
+
|
4175
|
+
elif isinstance(channel_of_interest, int):
|
4176
|
+
feature_string = f"channel_{channel_of_interest}"
|
4177
|
+
|
4178
|
+
elif channel_of_interest is 'morphology':
|
4179
|
+
feature_string = 'morphology'
|
4180
|
+
|
4181
|
+
elif channel_of_interest is None:
|
4182
|
+
feature_string = 'all_features'
|
4183
|
+
else:
|
4184
|
+
raise ValueError(f"Unsupported channel_of_interest: {channel_of_interest}. Supported values are 'int', 'list', 'None', or 'morphology'.")
|
4185
|
+
|
4186
|
+
res_fldr = os.path.join(src, 'results', model_type, feature_string)
|
4187
|
+
print(f'Saving results to {res_fldr}')
|
4188
|
+
os.makedirs(res_fldr, exist_ok=True)
|
4189
|
+
data_path = os.path.join(res_fldr, 'results.csv')
|
4190
|
+
permutation_path = os.path.join(res_fldr, 'permutation.csv')
|
4191
|
+
feature_importance_path = os.path.join(res_fldr, 'feature_importance.csv')
|
4192
|
+
model_metricks_path = os.path.join(res_fldr, f'{model_type}_model.csv')
|
4193
|
+
permutation_fig_path = os.path.join(res_fldr, 'permutation.pdf')
|
4194
|
+
feature_importance_fig_path = os.path.join(res_fldr, 'feature_importance.pdf')
|
4195
|
+
shap_fig_path = os.path.join(res_fldr, 'shap.pdf')
|
4196
|
+
plate_heatmap_path = os.path.join(res_fldr, 'plate_heatmap.pdf')
|
4197
|
+
settings_csv = os.path.join(res_fldr, 'ml_settings.csv')
|
4198
|
+
return data_path, permutation_path, feature_importance_path, model_metricks_path, permutation_fig_path, feature_importance_fig_path, shap_fig_path, plate_heatmap_path, settings_csv
|
4199
|
+
|
4200
|
+
def augment_image(image):
|
4201
|
+
"""
|
4202
|
+
Perform data augmentation by rotating and reflecting the image.
|
4203
|
+
|
4204
|
+
Parameters:
|
4205
|
+
- image (PIL Image or numpy array): The input image.
|
4206
|
+
|
4207
|
+
Returns:
|
4208
|
+
- augmented_images (list): A list of augmented images.
|
4209
|
+
"""
|
4210
|
+
augmented_images = []
|
4211
|
+
|
4212
|
+
# Convert PIL image to numpy array if necessary
|
4213
|
+
if isinstance(image, Image.Image):
|
4214
|
+
image = np.array(image)
|
4215
|
+
|
4216
|
+
# Handle grayscale images
|
4217
|
+
if len(image.shape) == 2:
|
4218
|
+
image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
|
4219
|
+
|
4220
|
+
# Rotations and reflections
|
4221
|
+
transformations = [
|
4222
|
+
None, # Original
|
4223
|
+
cv2.ROTATE_90_CLOCKWISE,
|
4224
|
+
cv2.ROTATE_180,
|
4225
|
+
cv2.ROTATE_90_COUNTERCLOCKWISE
|
4226
|
+
]
|
4227
|
+
|
4228
|
+
for transform in transformations:
|
4229
|
+
if transform is not None:
|
4230
|
+
rotated = cv2.rotate(image, transform)
|
4231
|
+
else:
|
4232
|
+
rotated = image
|
4233
|
+
augmented_images.append(rotated)
|
4234
|
+
|
4235
|
+
# Reflections
|
4236
|
+
flipped = cv2.flip(rotated, 1)
|
4237
|
+
augmented_images.append(flipped)
|
4238
|
+
|
4239
|
+
# Convert numpy arrays back to PIL images
|
4240
|
+
augmented_images = [Image.fromarray(img) for img in augmented_images]
|
4241
|
+
|
4242
|
+
return augmented_images
|
4243
|
+
|
4244
|
+
def augment_dataset(dataset, is_grayscale=False):
|
4245
|
+
"""
|
4246
|
+
Perform data augmentation on the entire dataset by rotating and reflecting the images.
|
4247
|
+
|
4248
|
+
Parameters:
|
4249
|
+
- dataset (list of tuples): The input dataset, each entry is a tuple (image, label, filename).
|
4250
|
+
- is_grayscale (bool): Flag indicating if the images are grayscale.
|
4251
|
+
|
4252
|
+
Returns:
|
4253
|
+
- augmented_dataset (list of tuples): A dataset with augmented (image, label, filename) tuples.
|
4254
|
+
"""
|
4255
|
+
augmented_dataset = []
|
4256
|
+
|
4257
|
+
for img, label, filename in dataset:
|
4258
|
+
augmented_images = []
|
4259
|
+
|
4260
|
+
# Ensure the image is a tensor
|
4261
|
+
if not isinstance(img, torch.Tensor):
|
4262
|
+
raise TypeError(f"Expected torch.Tensor, got {type(img)}")
|
4263
|
+
|
4264
|
+
# Rotations and reflections
|
4265
|
+
angles = [0, 90, 180, 270]
|
4266
|
+
|
4267
|
+
for angle in angles:
|
4268
|
+
rotated = torchvision.transforms.functional.rotate(img, angle)
|
4269
|
+
augmented_images.append(rotated)
|
4270
|
+
|
4271
|
+
# Reflections
|
4272
|
+
flipped = torchvision.transforms.functional.hflip(rotated)
|
4273
|
+
augmented_images.append(flipped)
|
4274
|
+
|
4275
|
+
# Add augmented images to the dataset
|
4276
|
+
for aug_img in augmented_images:
|
4277
|
+
augmented_dataset.append((aug_img, label, filename))
|
4278
|
+
|
4279
|
+
return augmented_dataset
|
4280
|
+
|
4281
|
+
|
4282
|
+
def convert_and_relabel_masks(folder_path):
|
4283
|
+
"""
|
4284
|
+
Converts all int64 npy masks in a folder to uint16 with relabeling to ensure all labels are retained.
|
4285
|
+
|
4286
|
+
Parameters:
|
4287
|
+
- folder_path (str): The path to the folder containing int64 npy mask files.
|
4288
|
+
|
4289
|
+
Returns:
|
4290
|
+
- None
|
4291
|
+
"""
|
4292
|
+
files = [f for f in os.listdir(folder_path) if f.endswith('.npy')]
|
4293
|
+
|
4294
|
+
for file in files:
|
4295
|
+
file_path = os.path.join(folder_path, file)
|
4296
|
+
# Load the mask
|
4297
|
+
mask = np.load(file_path)
|
4298
|
+
#print(mask.shape)
|
4299
|
+
#print(mask.dtype)
|
4300
|
+
# Check the current dtype
|
4301
|
+
if mask.dtype != np.int64:
|
4302
|
+
print(f"Skipping {file} as it is not int64.")
|
4303
|
+
continue
|
4304
|
+
|
4305
|
+
# Relabel the mask to ensure unique labels within uint16 range
|
4306
|
+
unique_labels = np.unique(mask)
|
4307
|
+
if unique_labels.max() > 65535:
|
4308
|
+
print(f"Warning: The mask in {file} contains values that exceed the uint16 range and will be relabeled.")
|
4309
|
+
|
4310
|
+
relabeled_mask = measure.label(mask, background=0)
|
4311
|
+
|
4312
|
+
# Check that relabeling worked correctly
|
4313
|
+
unique_relabeled = np.unique(relabeled_mask)
|
4314
|
+
if unique_relabeled.max() > 65535:
|
4315
|
+
print(f"Error: Relabeling failed for {file} as it still contains values that exceed the uint16 range.")
|
4316
|
+
continue
|
4317
|
+
|
4318
|
+
# Convert to uint16
|
4319
|
+
relabeled_mask = relabeled_mask.astype(np.uint16)
|
4320
|
+
|
4321
|
+
# Save the converted mask
|
4322
|
+
np.save(file_path, relabeled_mask)
|
4323
|
+
|
4324
|
+
print(f"Converted {file} and saved as uint16_{file}")
|
4325
|
+
|
4326
|
+
def correct_masks(src):
|
4327
|
+
|
4328
|
+
from .utils import _load_and_concatenate_arrays
|
4329
|
+
|
4330
|
+
cell_path = os.path.join(src,'norm_channel_stack', 'cell_mask_stack')
|
4331
|
+
convert_and_relabel_masks(cell_path)
|
4332
|
+
_load_and_concatenate_arrays(src, [0,1,2,3], 1, 0, 2)
|
@@ -0,0 +1,40 @@
|
|
1
|
+
spacr/__init__.py,sha256=rnb_oYH6HmC1KvJmc7ymrdtHvmMW5t7bn8tJa03cxcA,1286
|
2
|
+
spacr/__main__.py,sha256=bkAJJD2kjIqOP-u1kLvct9jQQCeUXzlEjdgitwi1Lm8,75
|
3
|
+
spacr/alpha.py,sha256=Y95sLEfpK2OSYKRn3M8eUOU33JJeXfV8zhrC4KnwSTY,35244
|
4
|
+
spacr/annotate_app.py,sha256=2X_xnXFN_w19RG99awsTPLzQfQZyQdwbaT-lcRxyV-w,20670
|
5
|
+
spacr/annotate_app_v2.py,sha256=kvikj_QbN4EHdyYwB0kjEepEuq2uVwfAF-VJ531qO3Q,22647
|
6
|
+
spacr/chris.py,sha256=YlBjSgeZaY8HPy6jkrT_ISAnCMAKVfvCxF0I9eAZLFM,2418
|
7
|
+
spacr/cli.py,sha256=507jfOOEV8BoL4eeUcblvH-iiDHdBrEVJLu1ghAAPSc,1800
|
8
|
+
spacr/core.py,sha256=m9fsk-qDPow4AzOYpTIsd4jT7PF_L_4y5xillR5eRdk,160253
|
9
|
+
spacr/deep_spacr.py,sha256=N0o7ILD2p1FTfU4DFxnpjs00xjLhwib-ev0XGqA6muU,37035
|
10
|
+
spacr/foldseek.py,sha256=YIP1d4Ci6CeA9jSyiv-HTDbNmAmcSM9Y_DaOs7wYzLY,33546
|
11
|
+
spacr/get_alfafold_structures.py,sha256=ehx_MQgb12k3hFecP6cYVlm5TLO8iWjgevy8ESyS3cw,3544
|
12
|
+
spacr/graph_learning.py,sha256=1tR-ZxvXE3dBz1Saw7BeVFcrsUFu9OlUZeZVifih9eo,13070
|
13
|
+
spacr/gui.py,sha256=ugBksLGOHdtOLlEuRyyc59TrkYKu3rDf8JxEgiBSVao,6536
|
14
|
+
spacr/gui_2.py,sha256=ZAI5quQYbhQJ40vK0NCqU_UMSPLkpfeQpomBWUSM0fc,6946
|
15
|
+
spacr/gui_classify_app.py,sha256=W_epjHsM3P9JfYDWFre694r9suXR_oEtBLvs6WAE_po,7860
|
16
|
+
spacr/gui_mask_app.py,sha256=Lmz1_PLUSuYYLWp36xnYSkKXqEn2bgaHIpW0uOeq4gQ,10403
|
17
|
+
spacr/gui_measure_app.py,sha256=kB-BL0_6vGo5MWND7e2OdLTz4MPa77K9tPYu3eDwBnk,10079
|
18
|
+
spacr/gui_sim_app.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
+
spacr/gui_utils.py,sha256=FFFpDzlNyolv1iQtawwD_acctvUpMsFsbVMCrdwyuCM,53167
|
20
|
+
spacr/io.py,sha256=IoERqSwoxJrInYl-E0WfwFOEDZXFdJofk5DmpbyLGWM,112077
|
21
|
+
spacr/logger.py,sha256=7Zqr3TuuOQLWT32gYr2q1qvv7x0a2JhLANmZcnBXAW8,670
|
22
|
+
spacr/mask_app.py,sha256=jlKmj_evveIkkyH3PYEcAshcLXN0DOPWB1oc4hAwq9E,44201
|
23
|
+
spacr/measure.py,sha256=0FRsHF5ftar4JZ0B_6Nq-NlyP5t6aiO0IrskyikIBEE,55000
|
24
|
+
spacr/old_code.py,sha256=jw67DAGoLBd7mWofVzRJSEmCI1Qrff26zIo65SEkV00,13817
|
25
|
+
spacr/plot.py,sha256=lrwU51OTWfby1wx73XGyjYmTjLVia7WOmGH5LZZ-4jM,67145
|
26
|
+
spacr/sequencing.py,sha256=U_TBJGNfOBfokGegUe950W_KPfm51VOgpfibXoZ8RMQ,83974
|
27
|
+
spacr/settings.py,sha256=Tr2fo2I75FGfmEVQOONOpGwqXMzFCrYMz4NAxav3ckg,21183
|
28
|
+
spacr/sim.py,sha256=FveaVgBi3eypO2oVB5Dx-v0CC1Ny7UPfXkJiiRRodAk,71212
|
29
|
+
spacr/timelapse.py,sha256=KMYCgHzf9LTZe-lWl5mvH2EjbKRE6OhpwdY13wEumGc,39504
|
30
|
+
spacr/utils.py,sha256=O7dpCF3bU95d2v0UuPFeJtzXYrkh0r-6aLxaqkKkFwY,184619
|
31
|
+
spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
|
32
|
+
spacr/models/cp/toxo_plaque_cyto_e25000_X1120_Y1120.CP_model,sha256=z8BbHWZPRnE9D_BHO0fBREE85c1vkltDs-incs2ytXQ,26566572
|
33
|
+
spacr/models/cp/toxo_plaque_cyto_e25000_X1120_Y1120.CP_model_settings.csv,sha256=fBAGuL_B8ERVdVizO3BHozTDSbZUh1yFzsYK3wkQN68,420
|
34
|
+
spacr/models/cp/toxo_pv_lumen.CP_model,sha256=2y_CindYhmTvVwBH39SNILF3rI3x9SsRn6qrMxHy3l0,26562451
|
35
|
+
spacr-0.1.1.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
|
36
|
+
spacr-0.1.1.dist-info/METADATA,sha256=f4CaWxwjyeC2yAEeYl-3J50QgVGZqTY9dBX9r66LyTM,5157
|
37
|
+
spacr-0.1.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
38
|
+
spacr-0.1.1.dist-info/entry_points.txt,sha256=xncHsqD9MI5wj0_p4mgZlrB8dHm_g_qF0Ggo1c78LqY,315
|
39
|
+
spacr-0.1.1.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
|
40
|
+
spacr-0.1.1.dist-info/RECORD,,
|