spacr 0.3.50__py3-none-any.whl → 0.3.52__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/gui_utils.py +11 -12
- spacr/measure.py +4 -1
- spacr/plot.py +152 -23
- spacr/settings.py +14 -30
- spacr/toxo.py +180 -1
- spacr/utils.py +24 -1
- {spacr-0.3.50.dist-info → spacr-0.3.52.dist-info}/METADATA +2 -1
- {spacr-0.3.50.dist-info → spacr-0.3.52.dist-info}/RECORD +12 -12
- {spacr-0.3.50.dist-info → spacr-0.3.52.dist-info}/LICENSE +0 -0
- {spacr-0.3.50.dist-info → spacr-0.3.52.dist-info}/WHEEL +0 -0
- {spacr-0.3.50.dist-info → spacr-0.3.52.dist-info}/entry_points.txt +0 -0
- {spacr-0.3.50.dist-info → spacr-0.3.52.dist-info}/top_level.txt +0 -0
spacr/gui_utils.py
CHANGED
@@ -76,8 +76,8 @@ def load_app(root, app_name, app_func):
|
|
76
76
|
root.current_app_exit_func()
|
77
77
|
else:
|
78
78
|
proceed_with_app(root, app_name, app_func)
|
79
|
-
|
80
|
-
def
|
79
|
+
|
80
|
+
def parse_list(value):
|
81
81
|
"""
|
82
82
|
Parses a string representation of a list and returns the parsed list.
|
83
83
|
|
@@ -85,7 +85,7 @@ def parse_list_v1(value):
|
|
85
85
|
value (str): The string representation of the list.
|
86
86
|
|
87
87
|
Returns:
|
88
|
-
list: The parsed list.
|
88
|
+
list: The parsed list, which can contain integers, floats, or strings.
|
89
89
|
|
90
90
|
Raises:
|
91
91
|
ValueError: If the input value is not a valid list format or contains mixed types or unsupported types.
|
@@ -93,21 +93,20 @@ def parse_list_v1(value):
|
|
93
93
|
try:
|
94
94
|
parsed_value = ast.literal_eval(value)
|
95
95
|
if isinstance(parsed_value, list):
|
96
|
-
# Check if
|
97
|
-
if all(isinstance(item, int) for item in parsed_value):
|
98
|
-
return parsed_value
|
99
|
-
elif all(isinstance(item, str) for item in parsed_value):
|
100
|
-
return parsed_value
|
101
|
-
elif all(isinstance(item, float) for item in parsed_value):
|
96
|
+
# Check if all elements are homogeneous (either all int, float, or str)
|
97
|
+
if all(isinstance(item, (int, float, str)) for item in parsed_value):
|
102
98
|
return parsed_value
|
103
99
|
else:
|
104
100
|
raise ValueError("List contains mixed types or unsupported types")
|
101
|
+
elif isinstance(parsed_value, tuple):
|
102
|
+
# Convert tuple to list if it’s a single-element tuple
|
103
|
+
return list(parsed_value) if len(parsed_value) > 1 else [parsed_value[0]]
|
105
104
|
else:
|
106
105
|
raise ValueError(f"Expected a list but got {type(parsed_value).__name__}")
|
107
106
|
except (ValueError, SyntaxError) as e:
|
108
107
|
raise ValueError(f"Invalid format for list: {value}. Error: {e}")
|
109
|
-
|
110
|
-
def
|
108
|
+
|
109
|
+
def parse_list_v1(value):
|
111
110
|
"""
|
112
111
|
Parses a string representation of a list and returns the parsed list.
|
113
112
|
|
@@ -391,7 +390,7 @@ def convert_settings_dict_for_gui(settings):
|
|
391
390
|
'nucleus_chann_dim': ('combo', chans, None),
|
392
391
|
'pathogen_mask_dim': ('combo', chans, None),
|
393
392
|
'pathogen_chann_dim': ('combo', chans, None),
|
394
|
-
'crop_mode': ('combo', ['cell', 'nucleus', 'pathogen', '
|
393
|
+
'crop_mode': ('combo', [['cell'], ['nucleus'], ['pathogen'], ['cell', 'nucleus'], ['cell', 'pathogen'], ['nucleus', 'pathogen'], ['cell', 'nucleus', 'pathogen']], ['cell']),
|
395
394
|
'magnification': ('combo', [20, 40, 60], 20),
|
396
395
|
'nucleus_channel': ('combo', chans_v2, None),
|
397
396
|
'cell_channel': ('combo', chans_v2, None),
|
spacr/measure.py
CHANGED
@@ -1028,7 +1028,10 @@ def measure_crop(settings):
|
|
1028
1028
|
return
|
1029
1029
|
|
1030
1030
|
if not isinstance(settings['crop_mode'], list):
|
1031
|
-
print(f"WARNING: crop_mode should be a list with at least one element e.g. ['cell'] or ['cell','nucleus'] or [None]")
|
1031
|
+
print(f"WARNING: crop_mode should be a list with at least one element e.g. ['cell'] or ['cell','nucleus'] or [None] got: {settings['crop_mode']}")
|
1032
|
+
settings['crop_mode'] = [settings['crop_mode']]
|
1033
|
+
settings['crop_mode'] = [str(crop_mode) for crop_mode in settings['crop_mode']]
|
1034
|
+
print(f"Converted crop_mode to list: {settings['crop_mode']}")
|
1032
1035
|
return
|
1033
1036
|
|
1034
1037
|
_save_settings_to_db(settings)
|
spacr/plot.py
CHANGED
@@ -16,6 +16,7 @@ from skimage import measure
|
|
16
16
|
from skimage.measure import find_contours, label, regionprops
|
17
17
|
from skimage.segmentation import mark_boundaries
|
18
18
|
from skimage.transform import resize as sk_resize
|
19
|
+
import scikit_posthocs as sp
|
19
20
|
|
20
21
|
import tifffile as tiff
|
21
22
|
|
@@ -2844,9 +2845,13 @@ class spacrGraph:
|
|
2844
2845
|
len(self.df[self.df[self.grouping_column] == unique_groups[1]])})
|
2845
2846
|
|
2846
2847
|
return test_results
|
2847
|
-
|
2848
|
+
|
2848
2849
|
def perform_posthoc_tests(self, is_normal, unique_groups):
|
2849
2850
|
"""Perform post-hoc tests for multiple groups based on all_to_all flag."""
|
2851
|
+
|
2852
|
+
from .utils import choose_p_adjust_method
|
2853
|
+
|
2854
|
+
posthoc_results = []
|
2850
2855
|
if is_normal and len(unique_groups) > 2 and self.all_to_all:
|
2851
2856
|
tukey_result = pairwise_tukeyhsd(self.df[self.data_column], self.df[self.grouping_column], alpha=0.05)
|
2852
2857
|
posthoc_results = []
|
@@ -2862,22 +2867,40 @@ class spacrGraph:
|
|
2862
2867
|
'n_object': len(raw_data1) + len(raw_data2),
|
2863
2868
|
'n_well': len(self.df[self.df[self.grouping_column] == comparison[0]]) + len(self.df[self.df[self.grouping_column] == comparison[1]])})
|
2864
2869
|
return posthoc_results
|
2865
|
-
|
2866
|
-
elif len(unique_groups) > 2 and
|
2867
|
-
|
2868
|
-
|
2869
|
-
for
|
2870
|
-
|
2871
|
-
|
2872
|
-
|
2873
|
-
|
2874
|
-
|
2875
|
-
|
2876
|
-
|
2877
|
-
|
2878
|
-
|
2870
|
+
|
2871
|
+
elif len(unique_groups) > 2 and self.all_to_all:
|
2872
|
+
print('performing_dunns')
|
2873
|
+
|
2874
|
+
# Prepare data for Dunn's test in long format
|
2875
|
+
long_data = self.df[[self.data_column[0], self.grouping_column]].dropna()
|
2876
|
+
|
2877
|
+
p_adjust_method = choose_p_adjust_method(num_groups=len(long_data[self.grouping_column].unique()),num_data_points=len(long_data) // len(long_data[self.grouping_column].unique()))
|
2878
|
+
|
2879
|
+
# Perform Dunn's test with Bonferroni correction
|
2880
|
+
dunn_result = sp.posthoc_dunn(
|
2881
|
+
long_data,
|
2882
|
+
val_col=self.data_column[0],
|
2883
|
+
group_col=self.grouping_column,
|
2884
|
+
p_adjust=p_adjust_method
|
2885
|
+
)
|
2886
|
+
|
2887
|
+
for group_a, group_b in zip(*np.triu_indices_from(dunn_result, k=1)):
|
2888
|
+
raw_data1 = self.raw_df[self.raw_df[self.grouping_column] == dunn_result.index[group_a]][self.data_column]
|
2889
|
+
raw_data2 = self.raw_df[self.raw_df[self.grouping_column] == dunn_result.columns[group_b]][self.data_column]
|
2890
|
+
|
2891
|
+
posthoc_results.append({
|
2892
|
+
'Comparison': f"{dunn_result.index[group_a]} vs {dunn_result.columns[group_b]}",
|
2893
|
+
'Test Statistic': None, # Dunn's test does not return a specific test statistic
|
2894
|
+
'p-value': dunn_result.iloc[group_a, group_b], # Extract the p-value from the matrix
|
2895
|
+
'Test Name': "Dunn's Post-hoc",
|
2896
|
+
'p_adjust_method': p_adjust_method,
|
2897
|
+
'n_object': len(raw_data1) + len(raw_data2), # Total objects
|
2898
|
+
'n_well': len(self.df[self.df[self.grouping_column] == dunn_result.index[group_a]]) +
|
2899
|
+
len(self.df[self.grouping_column] == dunn_result.columns[group_b])})
|
2900
|
+
|
2879
2901
|
return posthoc_results
|
2880
|
-
|
2902
|
+
|
2903
|
+
return posthoc_results
|
2881
2904
|
|
2882
2905
|
def create_plot(self, ax=None):
|
2883
2906
|
"""Create and display the plot based on the chosen graph type."""
|
@@ -2913,7 +2936,43 @@ class spacrGraph:
|
|
2913
2936
|
transposed_table = list(map(list, zip(*table_data)))
|
2914
2937
|
return row_labels, transposed_table
|
2915
2938
|
|
2916
|
-
|
2939
|
+
|
2940
|
+
def _place_symbols(row_labels, transposed_table, x_positions, ax):
|
2941
|
+
"""
|
2942
|
+
Places symbols and row labels aligned under the bars or jitter points on the graph.
|
2943
|
+
|
2944
|
+
Parameters:
|
2945
|
+
- row_labels: List of row titles to be displayed along the y-axis.
|
2946
|
+
- transposed_table: Data to be placed under each bar/jitter as symbols.
|
2947
|
+
- x_positions: X-axis positions for each group to align the symbols.
|
2948
|
+
- ax: The matplotlib Axes object where the plot is drawn.
|
2949
|
+
"""
|
2950
|
+
# Get plot dimensions and adjust for different plot sizes
|
2951
|
+
y_axis_min = ax.get_ylim()[0] # Minimum y-axis value (usually 0)
|
2952
|
+
symbol_start_y = y_axis_min - 0.05 * (ax.get_ylim()[1] - y_axis_min) # Adjust a bit below the x-axis
|
2953
|
+
|
2954
|
+
# Calculate spacing for the table rows (adjust as needed)
|
2955
|
+
y_spacing = 0.04 # Adjust this for better spacing between rows
|
2956
|
+
|
2957
|
+
# Determine the leftmost x-position for row labels (align with the y-axis)
|
2958
|
+
label_x_pos = ax.get_xlim()[0] - 0.3 # Adjust offset from the y-axis
|
2959
|
+
|
2960
|
+
# Place row labels vertically aligned with symbols
|
2961
|
+
for row_idx, title in enumerate(row_labels):
|
2962
|
+
y_pos = symbol_start_y - (row_idx * y_spacing) # Calculate vertical position for each label
|
2963
|
+
ax.text(label_x_pos, y_pos, title, ha='right', va='center', fontsize=12, fontweight='regular')
|
2964
|
+
|
2965
|
+
# Place symbols under each bar or jitter point based on x-positions
|
2966
|
+
for idx, (x_pos, column_data) in enumerate(zip(x_positions, transposed_table)):
|
2967
|
+
for row_idx, text in enumerate(column_data):
|
2968
|
+
y_pos = symbol_start_y - (row_idx * y_spacing) # Adjust vertical spacing for symbols
|
2969
|
+
ax.text(x_pos, y_pos, text, ha='center', va='center', fontsize=12, fontweight='regular')
|
2970
|
+
|
2971
|
+
# Redraw to apply changes
|
2972
|
+
ax.figure.canvas.draw()
|
2973
|
+
|
2974
|
+
|
2975
|
+
def _place_symbols_v1(row_labels, transposed_table, x_positions, ax):
|
2917
2976
|
|
2918
2977
|
# Get the bottom of the y-axis (y=0) in data coordinates and convert to display coordinates
|
2919
2978
|
y_axis_min = ax.get_ylim()[0] # Minimum y-axis value (usually 0)
|
@@ -3048,6 +3107,10 @@ class spacrGraph:
|
|
3048
3107
|
else:
|
3049
3108
|
raise ValueError(f"Unknown graph type: {self.graph_type}")
|
3050
3109
|
|
3110
|
+
if len(self.data_column) == 1:
|
3111
|
+
num_groups = len(self.df[self.grouping_column].unique())
|
3112
|
+
self._standerdize_figure_format(ax=ax, num_groups=num_groups, graph_type=self.graph_type)
|
3113
|
+
|
3051
3114
|
# Set y-axis start
|
3052
3115
|
if isinstance(self.y_lim, list):
|
3053
3116
|
if len(self.y_lim) == 2:
|
@@ -3082,7 +3145,73 @@ class spacrGraph:
|
|
3082
3145
|
if self.save:
|
3083
3146
|
self._save_results()
|
3084
3147
|
|
3085
|
-
ax.margins(x=0.12)
|
3148
|
+
ax.margins(x=0.12)
|
3149
|
+
|
3150
|
+
def _standerdize_figure_format(self, ax, num_groups, graph_type):
|
3151
|
+
"""
|
3152
|
+
Adjusts the figure layout (size, bar width, jitter, and spacing) based on the number of groups.
|
3153
|
+
|
3154
|
+
Parameters:
|
3155
|
+
- ax: The matplotlib Axes object.
|
3156
|
+
- num_groups: Number of unique groups.
|
3157
|
+
- graph_type: The type of graph (e.g., 'bar', 'jitter', 'box', etc.).
|
3158
|
+
|
3159
|
+
Returns:
|
3160
|
+
- None. Modifies the figure and Axes in place.
|
3161
|
+
"""
|
3162
|
+
if graph_type in ['line', 'line_std']:
|
3163
|
+
print("Skipping layout adjustment for line graphs.")
|
3164
|
+
return # Skip layout adjustment for line graphs
|
3165
|
+
|
3166
|
+
correction_factor = 4
|
3167
|
+
|
3168
|
+
# Set figure size to ensure it remains square with a minimum size
|
3169
|
+
fig_size = max(6, num_groups * 2) / correction_factor
|
3170
|
+
ax.figure.set_size_inches(fig_size, fig_size)
|
3171
|
+
|
3172
|
+
# Configure layout based on the number of groups
|
3173
|
+
bar_width = min(0.8, 1.5 / num_groups) / correction_factor
|
3174
|
+
jitter_amount = min(0.1, 0.2 / num_groups) / correction_factor
|
3175
|
+
jitter_size = max(50 / num_groups, 200)
|
3176
|
+
|
3177
|
+
# Adjust axis limits to ensure bars are centered with respect to group labels
|
3178
|
+
ax.set_xlim(-0.5, num_groups - 0.5)
|
3179
|
+
|
3180
|
+
# Set ticks to match the group labels in your DataFrame
|
3181
|
+
group_labels = self.df[self.grouping_column].unique()
|
3182
|
+
ax.set_xticks(range(len(group_labels)))
|
3183
|
+
ax.set_xticklabels(group_labels, rotation=45, ha='right')
|
3184
|
+
|
3185
|
+
# Customize elements based on the graph type
|
3186
|
+
if graph_type == 'bar':
|
3187
|
+
# Adjust bars' width and position
|
3188
|
+
for bar in ax.patches:
|
3189
|
+
bar.set_width(bar_width)
|
3190
|
+
bar.set_x(bar.get_x() - bar_width / 2)
|
3191
|
+
|
3192
|
+
elif graph_type in ['jitter', 'jitter_bar', 'jitter_box']:
|
3193
|
+
# Adjust jitter points' position and size
|
3194
|
+
for coll in ax.collections:
|
3195
|
+
offsets = coll.get_offsets()
|
3196
|
+
offsets[:, 0] += jitter_amount # Shift jitter points slightly
|
3197
|
+
coll.set_offsets(offsets)
|
3198
|
+
coll.set_sizes([jitter_size] * len(offsets)) # Adjust point size dynamically
|
3199
|
+
|
3200
|
+
elif graph_type in ['box', 'violin']:
|
3201
|
+
# Adjust box width for consistent spacing
|
3202
|
+
for artist in ax.artists:
|
3203
|
+
artist.set_width(bar_width)
|
3204
|
+
|
3205
|
+
# Adjust legend and axis labels
|
3206
|
+
ax.tick_params(axis='x', labelsize=max(10, 15 - num_groups // 2))
|
3207
|
+
ax.tick_params(axis='y', labelsize=max(10, 15 - num_groups // 2))
|
3208
|
+
|
3209
|
+
if ax.get_legend():
|
3210
|
+
ax.get_legend().set_bbox_to_anchor((1.05, 1)) #loc='upper left',borderaxespad=0.
|
3211
|
+
ax.get_legend().prop.set_size(max(8, 12 - num_groups // 3))
|
3212
|
+
|
3213
|
+
# Redraw the figure to apply changes
|
3214
|
+
ax.figure.canvas.draw()
|
3086
3215
|
|
3087
3216
|
def _create_bar_plot(self, ax):
|
3088
3217
|
"""Helper method to create a bar plot with consistent bar thickness and centered error bars."""
|
@@ -3301,11 +3430,11 @@ class spacrGraph:
|
|
3301
3430
|
bar.set_x(bar.get_x() - target_width / 2)
|
3302
3431
|
|
3303
3432
|
# Adjust error bars alignment with bars
|
3304
|
-
bars = [bar for bar in ax.patches if isinstance(bar, plt.Rectangle)]
|
3305
|
-
for bar, (_, row) in zip(bars, summary_df.iterrows()):
|
3306
|
-
|
3307
|
-
|
3308
|
-
|
3433
|
+
#bars = [bar for bar in ax.patches if isinstance(bar, plt.Rectangle)]
|
3434
|
+
#for bar, (_, row) in zip(bars, summary_df.iterrows()):
|
3435
|
+
# x_bar = bar.get_x() + bar.get_width() / 2
|
3436
|
+
# err = row[self.error_bar_type]
|
3437
|
+
# ax.errorbar(x=x_bar, y=bar.get_height(), yerr=err, fmt='none', c='black', capsize=5, lw=2)
|
3309
3438
|
|
3310
3439
|
# Set legend and labels
|
3311
3440
|
ax.set_xlabel(self.grouping_column)
|
spacr/settings.py
CHANGED
@@ -246,7 +246,7 @@ def get_measure_crop_settings(settings={}):
|
|
246
246
|
settings.setdefault('normalize_by','png')
|
247
247
|
settings.setdefault('crop_mode',['cell'])
|
248
248
|
settings.setdefault('dialate_pngs', False)
|
249
|
-
settings.setdefault('dialate_png_ratios', [0.2
|
249
|
+
settings.setdefault('dialate_png_ratios', [0.2])
|
250
250
|
|
251
251
|
# Timelapsed settings
|
252
252
|
settings.setdefault('timelapse', False)
|
@@ -697,16 +697,6 @@ expected_types = {
|
|
697
697
|
"overlay_chans": list,
|
698
698
|
"overlay": bool,
|
699
699
|
"normalization_percentiles": list,
|
700
|
-
"print_object_number": bool,
|
701
|
-
"nr": int,
|
702
|
-
"figuresize": int,
|
703
|
-
"cmap": str,
|
704
|
-
"test_mode": bool,
|
705
|
-
"test_images": int,
|
706
|
-
"remove_background_cell": bool,
|
707
|
-
"remove_background_nucleus": bool,
|
708
|
-
"remove_background_pathogen": bool,
|
709
|
-
"pathogen_model": (str, type(None)),
|
710
700
|
"filter": bool,
|
711
701
|
"fill_in":bool,
|
712
702
|
"upscale": bool,
|
@@ -825,18 +815,6 @@ expected_types = {
|
|
825
815
|
"transform": (str, type(None)),
|
826
816
|
"agg_type": str,
|
827
817
|
"min_cell_count": int,
|
828
|
-
"regression_type": str,
|
829
|
-
"random_row_column_effects": bool,
|
830
|
-
"alpha": float,
|
831
|
-
"fraction_threshold": float,
|
832
|
-
"class_1_threshold": (float, type(None)),
|
833
|
-
"batch_size": int,
|
834
|
-
"CP_prob": float,
|
835
|
-
"flow_threshold": float,
|
836
|
-
"percentiles": (list, type(None)),
|
837
|
-
"invert": bool,
|
838
|
-
"diameter": int,
|
839
|
-
"grayscale": bool,
|
840
818
|
"resize": bool,
|
841
819
|
"target_height": (int, type(None)),
|
842
820
|
"target_width": (int, type(None)),
|
@@ -881,9 +859,6 @@ expected_types = {
|
|
881
859
|
"metadata_type_by":str,
|
882
860
|
"custom_measurement":str,
|
883
861
|
"custom_model":bool,
|
884
|
-
"size":int,
|
885
|
-
"test_split":float,
|
886
|
-
"class_metadata":list, # This is a list of lists
|
887
862
|
"png_type":str,
|
888
863
|
"custom_model_path":str,
|
889
864
|
"generate_training_dataset":bool,
|
@@ -894,6 +869,7 @@ expected_types = {
|
|
894
869
|
"correlate":bool,
|
895
870
|
"target_layer":str,
|
896
871
|
"save_to_db":bool,
|
872
|
+
"test_mode":bool,
|
897
873
|
"normalize_input":bool,
|
898
874
|
}
|
899
875
|
|
@@ -904,7 +880,7 @@ categories = {"Paths":[ "src", "grna", "barcodes", "custom_model_path", "dataset
|
|
904
880
|
"Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
|
905
881
|
"Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim", "pathogens", "pathogen_loc", "pathogen_types", "pathogen_plate_metadata", ],
|
906
882
|
"Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
|
907
|
-
"Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "crop_mode", "
|
883
|
+
"Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "crop_mode", "normalize", "use_bounding_box"],
|
908
884
|
"Sequencing": ["signal_direction","mode","comp_level","comp_type","save_h5","expected_end","offset","target_sequence","regex", "highlight"],
|
909
885
|
"Generate Dataset":["save_to_db","file_metadata","class_metadata", "annotation_column","annotated_classes", "dataset_mode", "metadata_type_by","custom_measurement", "sample", "size"],
|
910
886
|
"Hyperparamiters (Training)": ["png_type", "score_threshold","file_type", "train_channels", "epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes", "augment", "amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory"],
|
@@ -939,6 +915,9 @@ def check_settings(vars_dict, expected_types, q=None):
|
|
939
915
|
continue
|
940
916
|
|
941
917
|
value = var.get()
|
918
|
+
if value == 'None':
|
919
|
+
value = None
|
920
|
+
|
942
921
|
expected_type = expected_types.get(key, str)
|
943
922
|
|
944
923
|
try:
|
@@ -953,14 +932,19 @@ def check_settings(vars_dict, expected_types, q=None):
|
|
953
932
|
# settings[key] = None
|
954
933
|
else:
|
955
934
|
raise ValueError("Invalid format for list or list of lists")
|
935
|
+
|
956
936
|
elif expected_type == list:
|
957
937
|
settings[key] = parse_list(value) if value else None
|
938
|
+
|
939
|
+
if isinstance(settings[key], list) and len(settings[key]) == 1:
|
940
|
+
settings[key] = settings[key][0]
|
941
|
+
|
958
942
|
elif expected_type == bool:
|
959
943
|
settings[key] = value if isinstance(value, bool) else value.lower() in ['true', '1', 't', 'y', 'yes']
|
960
944
|
elif expected_type == (int, type(None)):
|
961
|
-
settings[key] = int(value) if value else None
|
945
|
+
settings[key] = settings[key] = int(value) if isinstance(value, int) or str(value).isdigit() else None
|
962
946
|
elif expected_type == (float, type(None)):
|
963
|
-
settings[key] = float(value) if value else None
|
947
|
+
settings[key] = float(value) if isinstance(value, float) or (isinstance(value, str) and value.replace(".", "", 1).isdigit()) else None
|
964
948
|
elif expected_type == (int, float):
|
965
949
|
settings[key] = float(value) if '.' in value else int(value)
|
966
950
|
elif expected_type == (str, type(None)):
|
@@ -1000,7 +984,7 @@ def check_settings(vars_dict, expected_types, q=None):
|
|
1000
984
|
settings[key] = expected_type(value) if value else None
|
1001
985
|
except (ValueError, SyntaxError) as e:
|
1002
986
|
expected_type_name = ' or '.join([t.__name__ for t in expected_type]) if isinstance(expected_type, tuple) else expected_type.__name__
|
1003
|
-
q.put(f"Error: Invalid format for {key}. Expected type: {expected_type_name}. Error: {e}")
|
987
|
+
q.put(f"Error: Invalid format for {key}. Expected type: {expected_type_name}. Error: {e}, Value entered: {value}")
|
1004
988
|
return
|
1005
989
|
|
1006
990
|
return settings
|
spacr/toxo.py
CHANGED
@@ -10,6 +10,17 @@ from matplotlib.legend import Legend
|
|
10
10
|
from matplotlib.transforms import Bbox
|
11
11
|
from brokenaxes import brokenaxes
|
12
12
|
|
13
|
+
import os
|
14
|
+
import pandas as pd
|
15
|
+
import seaborn as sns
|
16
|
+
import matplotlib.pyplot as plt
|
17
|
+
from scipy.spatial.distance import cosine
|
18
|
+
from scipy.stats import pearsonr
|
19
|
+
import pandas as pd
|
20
|
+
import matplotlib.pyplot as plt
|
21
|
+
import seaborn as sns
|
22
|
+
from sklearn.metrics import mean_absolute_error
|
23
|
+
|
13
24
|
|
14
25
|
from matplotlib.gridspec import GridSpec
|
15
26
|
|
@@ -450,4 +461,172 @@ def plot_gene_heatmaps(data, gene_list, columns, x_column='Gene ID', normalize=F
|
|
450
461
|
plt.savefig(save_path, format='pdf', dpi=600, bbox_inches='tight')
|
451
462
|
print(f"Figure saved to {save_path}")
|
452
463
|
|
453
|
-
plt.show()
|
464
|
+
plt.show()
|
465
|
+
|
466
|
+
def generate_score_heatmap(settings):
|
467
|
+
|
468
|
+
def group_cv_score(csv, plate=1, column='c3', data_column='pred'):
|
469
|
+
|
470
|
+
df = pd.read_csv(csv)
|
471
|
+
if 'col' in df.columns:
|
472
|
+
df = df[df['col']==column]
|
473
|
+
elif 'column' in df.columns:
|
474
|
+
df['col'] = df['column']
|
475
|
+
df = df[df['col']==column]
|
476
|
+
if not plate is None:
|
477
|
+
df['plate'] = f"plate{plate}"
|
478
|
+
grouped_df = df.groupby(['plate', 'row', 'col'])[data_column].mean().reset_index()
|
479
|
+
grouped_df['prc'] = grouped_df['plate'].astype(str) + '_' + grouped_df['row'].astype(str) + '_' + grouped_df['col'].astype(str)
|
480
|
+
return grouped_df
|
481
|
+
|
482
|
+
def calculate_fraction_mixed_condition(csv, plate=1, column='c3', control_sgrnas = ['TGGT1_220950_1', 'TGGT1_233460_4']):
|
483
|
+
df = pd.read_csv(csv)
|
484
|
+
df = df[df['column_name']==column]
|
485
|
+
if plate not in df.columns:
|
486
|
+
df['plate'] = f"plate{plate}"
|
487
|
+
df = df[df['grna_name'].str.match(f'^{control_sgrnas[0]}$|^{control_sgrnas[1]}$')]
|
488
|
+
grouped_df = df.groupby(['plate', 'row_name', 'column_name'])['count'].sum().reset_index()
|
489
|
+
grouped_df = grouped_df.rename(columns={'count': 'total_count'})
|
490
|
+
merged_df = pd.merge(df, grouped_df, on=['plate', 'row_name', 'column_name'])
|
491
|
+
merged_df['fraction'] = merged_df['count'] / merged_df['total_count']
|
492
|
+
merged_df['prc'] = merged_df['plate'].astype(str) + '_' + merged_df['row_name'].astype(str) + '_' + merged_df['column_name'].astype(str)
|
493
|
+
return merged_df
|
494
|
+
|
495
|
+
def plot_multi_channel_heatmap(df, column='c3'):
|
496
|
+
"""
|
497
|
+
Plot a heatmap with multiple channels as columns.
|
498
|
+
|
499
|
+
Parameters:
|
500
|
+
- df: DataFrame with scores for different channels.
|
501
|
+
- column: Column to filter by (default is 'c3').
|
502
|
+
"""
|
503
|
+
# Extract row number and convert to integer for sorting
|
504
|
+
df['row_num'] = df['row'].str.extract(r'(\d+)').astype(int)
|
505
|
+
|
506
|
+
# Filter and sort by plate, row, and column
|
507
|
+
df = df[df['col'] == column]
|
508
|
+
df = df.sort_values(by=['plate', 'row_num', 'col'])
|
509
|
+
|
510
|
+
# Drop temporary 'row_num' column after sorting
|
511
|
+
df = df.drop('row_num', axis=1)
|
512
|
+
|
513
|
+
# Create a new column combining plate, row, and column for the index
|
514
|
+
df['plate_row_col'] = df['plate'] + '-' + df['row'] + '-' + df['col']
|
515
|
+
|
516
|
+
# Set 'plate_row_col' as the index
|
517
|
+
df.set_index('plate_row_col', inplace=True)
|
518
|
+
|
519
|
+
# Extract only numeric data for the heatmap
|
520
|
+
heatmap_data = df.select_dtypes(include=[float, int])
|
521
|
+
|
522
|
+
# Plot heatmap with square boxes, no annotations, and 'viridis' colormap
|
523
|
+
plt.figure(figsize=(12, 8))
|
524
|
+
sns.heatmap(
|
525
|
+
heatmap_data,
|
526
|
+
cmap="viridis",
|
527
|
+
cbar=True,
|
528
|
+
square=True,
|
529
|
+
annot=False
|
530
|
+
)
|
531
|
+
|
532
|
+
plt.title("Heatmap of Prediction Scores for All Channels")
|
533
|
+
plt.xlabel("Channels")
|
534
|
+
plt.ylabel("Plate-Row-Column")
|
535
|
+
plt.tight_layout()
|
536
|
+
|
537
|
+
# Save the figure object and return it
|
538
|
+
fig = plt.gcf()
|
539
|
+
plt.show()
|
540
|
+
|
541
|
+
return fig
|
542
|
+
|
543
|
+
|
544
|
+
def combine_classification_scores(folders, csv_name, data_column, plate=1, column='c3'):
|
545
|
+
# Ensure `folders` is a list
|
546
|
+
if isinstance(folders, str):
|
547
|
+
folders = [folders]
|
548
|
+
|
549
|
+
ls = [] # Initialize ls to store found CSV file paths
|
550
|
+
|
551
|
+
# Iterate over the provided folders
|
552
|
+
for folder in folders:
|
553
|
+
sub_folders = os.listdir(folder) # Get sub-folder list
|
554
|
+
for sub_folder in sub_folders: # Iterate through sub-folders
|
555
|
+
path = os.path.join(folder, sub_folder) # Join the full path
|
556
|
+
|
557
|
+
if os.path.isdir(path): # Check if it’s a directory
|
558
|
+
csv = os.path.join(path, csv_name) # Join path to the CSV file
|
559
|
+
if os.path.exists(csv): # If CSV exists, add to list
|
560
|
+
ls.append(csv)
|
561
|
+
else:
|
562
|
+
print(f'No such file: {csv}')
|
563
|
+
|
564
|
+
# Initialize combined DataFrame
|
565
|
+
combined_df = None
|
566
|
+
print(f'Found {len(ls)} CSV files')
|
567
|
+
|
568
|
+
# Loop through all collected CSV files and process them
|
569
|
+
for csv_file in ls:
|
570
|
+
df = pd.read_csv(csv_file) # Read CSV into DataFrame
|
571
|
+
df = df[df['col']==column]
|
572
|
+
if not plate is None:
|
573
|
+
df['plate'] = f"plate{plate}"
|
574
|
+
# Group the data by 'plate', 'row', and 'col'
|
575
|
+
grouped_df = df.groupby(['plate', 'row', 'col'])[data_column].mean().reset_index()
|
576
|
+
# Use the CSV filename to create a new column name
|
577
|
+
folder_name = os.path.dirname(csv_file).replace(".csv", "")
|
578
|
+
new_column_name = os.path.basename(f"{folder_name}_{data_column}")
|
579
|
+
print(new_column_name)
|
580
|
+
grouped_df = grouped_df.rename(columns={data_column: new_column_name})
|
581
|
+
|
582
|
+
# Merge into the combined DataFrame
|
583
|
+
if combined_df is None:
|
584
|
+
combined_df = grouped_df
|
585
|
+
else:
|
586
|
+
combined_df = pd.merge(combined_df, grouped_df, on=['plate', 'row', 'col'], how='outer')
|
587
|
+
combined_df['prc'] = combined_df['plate'].astype(str) + '_' + combined_df['row'].astype(str) + '_' + combined_df['col'].astype(str)
|
588
|
+
return combined_df
|
589
|
+
|
590
|
+
def calculate_mae(df):
|
591
|
+
"""
|
592
|
+
Calculate the MAE between each channel's predictions and the fraction column for all rows.
|
593
|
+
"""
|
594
|
+
# Extract numeric columns excluding 'fraction' and 'prc'
|
595
|
+
channels = df.drop(columns=['fraction', 'prc']).select_dtypes(include=[float, int])
|
596
|
+
|
597
|
+
mae_data = []
|
598
|
+
|
599
|
+
# Compute MAE for each channel with 'fraction' for all rows
|
600
|
+
for column in channels.columns:
|
601
|
+
for index, row in df.iterrows():
|
602
|
+
mae = mean_absolute_error([row['fraction']], [row[column]])
|
603
|
+
mae_data.append({'Channel': column, 'MAE': mae, 'Row': row['prc']})
|
604
|
+
|
605
|
+
# Convert the list of dictionaries to a DataFrame
|
606
|
+
mae_df = pd.DataFrame(mae_data)
|
607
|
+
return mae_df
|
608
|
+
|
609
|
+
result_df = combine_classification_scores(settings['folders'], settings['csv_name'], settings['data_column'], settings['plate'], settings['column'], )
|
610
|
+
df = calculate_fraction_mixed_condition(settings['csv'], settings['plate'], settings['column'], settings['control_sgrnas'])
|
611
|
+
df = df[df['grna_name']==settings['fraction_grna']]
|
612
|
+
fraction_df = df[['fraction', 'prc']]
|
613
|
+
merged_df = pd.merge(fraction_df, result_df, on=['prc'])
|
614
|
+
cv_df = group_cv_score(settings['cv_csv'], settings['plate'], settings['column'], settings['data_column_cv'])
|
615
|
+
cv_df = cv_df[[settings['data_column_cv'], 'prc']]
|
616
|
+
merged_df = pd.merge(merged_df, cv_df, on=['prc'])
|
617
|
+
|
618
|
+
fig = plot_multi_channel_heatmap(merged_df, settings['column'])
|
619
|
+
if 'row_number' in merged_df.columns:
|
620
|
+
merged_df = merged_df.drop('row_num', axis=1)
|
621
|
+
mae_df = calculate_mae(merged_df)
|
622
|
+
if 'row_number' in mae_df.columns:
|
623
|
+
mae_df = mae_df.drop('row_num', axis=1)
|
624
|
+
|
625
|
+
if not settings['dst'] is None:
|
626
|
+
mae_dst = os.path.join(settings['dst'], f"mae_scores_comparison_plate_{settings['plate']}.csv")
|
627
|
+
merged_dst = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['plate']}_data.csv")
|
628
|
+
heatmap_save = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['plate']}.pdf")
|
629
|
+
mae_df.to_csv(mae_dst, index=False)
|
630
|
+
merged_df.to_csv(merged_dst, index=False)
|
631
|
+
fig.savefig(heatmap_save, format='pdf', dpi=600, bbox_inches='tight')
|
632
|
+
return merged_df
|
spacr/utils.py
CHANGED
@@ -5232,4 +5232,27 @@ def control_filelist(folder, mode='column', values=['01','02']):
|
|
5232
5232
|
filtered_files = [file for file in files if file.split('_')[1][1:] in values]
|
5233
5233
|
if mode is 'row':
|
5234
5234
|
filtered_files = [file for file in files if file.split('_')[1][:1] in values]
|
5235
|
-
return filtered_files
|
5235
|
+
return filtered_files
|
5236
|
+
|
5237
|
+
def choose_p_adjust_method(num_groups, num_data_points):
|
5238
|
+
"""
|
5239
|
+
Selects the most appropriate p-value adjustment method based on data characteristics.
|
5240
|
+
|
5241
|
+
Parameters:
|
5242
|
+
- num_groups: Number of unique groups being compared
|
5243
|
+
- num_data_points: Number of data points per group (assuming balanced groups)
|
5244
|
+
|
5245
|
+
Returns:
|
5246
|
+
- A string representing the recommended p-adjustment method
|
5247
|
+
"""
|
5248
|
+
num_comparisons = (num_groups * (num_groups - 1)) // 2 # Number of pairwise comparisons
|
5249
|
+
|
5250
|
+
# Decision logic for choosing the adjustment method
|
5251
|
+
if num_comparisons <= 10 and num_data_points > 5:
|
5252
|
+
return 'holm' # Balanced between power and Type I error control
|
5253
|
+
elif num_comparisons > 10 and num_data_points <= 5:
|
5254
|
+
return 'fdr_bh' # FDR control for large number of comparisons and small sample size
|
5255
|
+
elif num_comparisons <= 10:
|
5256
|
+
return 'sidak' # Less conservative than Bonferroni, good for independent comparisons
|
5257
|
+
else:
|
5258
|
+
return 'bonferroni' # Very conservative, use for strict control of Type I errors
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: spacr
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.52
|
4
4
|
Summary: Spatial phenotype analysis of crisp screens (SpaCr)
|
5
5
|
Home-page: https://github.com/EinarOlafsson/spacr
|
6
6
|
Author: Einar Birnir Olafsson
|
@@ -16,6 +16,7 @@ Requires-Dist: scipy<2.0,>=1.12.0
|
|
16
16
|
Requires-Dist: cellpose<4.0,>=3.0.6
|
17
17
|
Requires-Dist: scikit-image<1.0,>=0.22.0
|
18
18
|
Requires-Dist: scikit-learn<2.0,>=1.4.1
|
19
|
+
Requires-Dist: scikit-posthocs<0.20,>=0.10.0
|
19
20
|
Requires-Dist: mahotas<2.0,>=1.4.13
|
20
21
|
Requires-Dist: btrack<1.0,>=0.6.5
|
21
22
|
Requires-Dist: trackpy<1.0,>=0.6.2
|
@@ -14,21 +14,21 @@ spacr/deep_spacr.py,sha256=HdOcNU8cHcE_19nP7_5uTz-ih3E169ffr2Hm--NvMvA,43255
|
|
14
14
|
spacr/gui.py,sha256=ARyn9Q_g8HoP-cXh1nzMLVFCKqthY4v2u9yORyaQqQE,8230
|
15
15
|
spacr/gui_core.py,sha256=N7R7yvfK_dJhOReM_kW3Ci8Bokhi1OzsxeKqvSGdvV4,41460
|
16
16
|
spacr/gui_elements.py,sha256=Dr9KEek41LggJ2z2zfh28a7w86sZXg1jzF388rF2BT4,138249
|
17
|
-
spacr/gui_utils.py,sha256=
|
17
|
+
spacr/gui_utils.py,sha256=aOVI2G71pObolEjuSgAKr86XJZtN98uqqBfCbucMhRQ,45560
|
18
18
|
spacr/io.py,sha256=1rIdJ_8dyn7W4D2zXjaOqlgyo_Y5Z7X86aRp4hNYWCU,144194
|
19
19
|
spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
|
20
|
-
spacr/measure.py,sha256=
|
20
|
+
spacr/measure.py,sha256=euywVbBXRllht-frnxy9QHV76i8HkPPjm3dJw46kcz0,55085
|
21
21
|
spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
|
22
22
|
spacr/ml.py,sha256=Oykp3drBxZrcwrWQh2n6Xt1OzZER6pSIiaR-W0GO2_E,67353
|
23
23
|
spacr/openai.py,sha256=5vBZ3Jl2llYcW3oaTEXgdyCB2aJujMUIO5K038z7w_A,1246
|
24
|
-
spacr/plot.py,sha256=
|
24
|
+
spacr/plot.py,sha256=B4nxp7NFyM6MmtarW9zbxydeEvVIMgPqHEElkCIycpA,169566
|
25
25
|
spacr/sequencing.py,sha256=HDpF_C3hRd-fk6ZENPmI3vgYoom3HIvaeIIZWLhaIAY,25037
|
26
|
-
spacr/settings.py,sha256=
|
26
|
+
spacr/settings.py,sha256=2Er3G6ApcemIEhyQCsF8zmVdBW8KiKf4nkSismu03s8,77358
|
27
27
|
spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
|
28
28
|
spacr/submodules.py,sha256=3C5M4UbI9Ral1MX4PTpucaAaqhL3RADuCOCqaHhMyUg,28048
|
29
29
|
spacr/timelapse.py,sha256=FSYpUtAVy6xc3lwprRYgyDTT9ysUhfRQ4zrP9_h2mvg,39465
|
30
|
-
spacr/toxo.py,sha256=
|
31
|
-
spacr/utils.py,sha256=
|
30
|
+
spacr/toxo.py,sha256=bcH5Ug6xtf3BBxvOFc8e4Jc5R3f6-C46oHhgiSoYl3U,24935
|
31
|
+
spacr/utils.py,sha256=clrjlUOhY-LQH0bTIiNJ9VMBEU9qlRbTvoU7-yNLzsg,222475
|
32
32
|
spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
|
33
33
|
spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
|
34
34
|
spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
|
@@ -151,9 +151,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
|
|
151
151
|
spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
|
152
152
|
spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
|
153
153
|
spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
|
154
|
-
spacr-0.3.
|
155
|
-
spacr-0.3.
|
156
|
-
spacr-0.3.
|
157
|
-
spacr-0.3.
|
158
|
-
spacr-0.3.
|
159
|
-
spacr-0.3.
|
154
|
+
spacr-0.3.52.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
|
155
|
+
spacr-0.3.52.dist-info/METADATA,sha256=I0OCX9zCPvfgnnGS2JYaV-vdNvirglSfO6IzX68s8uI,6032
|
156
|
+
spacr-0.3.52.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
|
157
|
+
spacr-0.3.52.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
|
158
|
+
spacr-0.3.52.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
|
159
|
+
spacr-0.3.52.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|