spacr 0.3.50__py3-none-any.whl → 0.3.52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/gui_utils.py CHANGED
@@ -76,8 +76,8 @@ def load_app(root, app_name, app_func):
76
76
  root.current_app_exit_func()
77
77
  else:
78
78
  proceed_with_app(root, app_name, app_func)
79
-
80
- def parse_list_v1(value):
79
+
80
+ def parse_list(value):
81
81
  """
82
82
  Parses a string representation of a list and returns the parsed list.
83
83
 
@@ -85,7 +85,7 @@ def parse_list_v1(value):
85
85
  value (str): The string representation of the list.
86
86
 
87
87
  Returns:
88
- list: The parsed list.
88
+ list: The parsed list, which can contain integers, floats, or strings.
89
89
 
90
90
  Raises:
91
91
  ValueError: If the input value is not a valid list format or contains mixed types or unsupported types.
@@ -93,21 +93,20 @@ def parse_list_v1(value):
93
93
  try:
94
94
  parsed_value = ast.literal_eval(value)
95
95
  if isinstance(parsed_value, list):
96
- # Check if the list elements are homogeneous (all int or all str)
97
- if all(isinstance(item, int) for item in parsed_value):
98
- return parsed_value
99
- elif all(isinstance(item, str) for item in parsed_value):
100
- return parsed_value
101
- elif all(isinstance(item, float) for item in parsed_value):
96
+ # Check if all elements are homogeneous (either all int, float, or str)
97
+ if all(isinstance(item, (int, float, str)) for item in parsed_value):
102
98
  return parsed_value
103
99
  else:
104
100
  raise ValueError("List contains mixed types or unsupported types")
101
+ elif isinstance(parsed_value, tuple):
102
+ # Convert tuple to list if it’s a single-element tuple
103
+ return list(parsed_value) if len(parsed_value) > 1 else [parsed_value[0]]
105
104
  else:
106
105
  raise ValueError(f"Expected a list but got {type(parsed_value).__name__}")
107
106
  except (ValueError, SyntaxError) as e:
108
107
  raise ValueError(f"Invalid format for list: {value}. Error: {e}")
109
-
110
- def parse_list(value):
108
+
109
+ def parse_list_v1(value):
111
110
  """
112
111
  Parses a string representation of a list and returns the parsed list.
113
112
 
@@ -391,7 +390,7 @@ def convert_settings_dict_for_gui(settings):
391
390
  'nucleus_chann_dim': ('combo', chans, None),
392
391
  'pathogen_mask_dim': ('combo', chans, None),
393
392
  'pathogen_chann_dim': ('combo', chans, None),
394
- 'crop_mode': ('combo', ['cell', 'nucleus', 'pathogen', '[cell, nucleus, pathogen]', '[cell,nucleus, pathogen]'], ['cell']),
393
+ 'crop_mode': ('combo', [['cell'], ['nucleus'], ['pathogen'], ['cell', 'nucleus'], ['cell', 'pathogen'], ['nucleus', 'pathogen'], ['cell', 'nucleus', 'pathogen']], ['cell']),
395
394
  'magnification': ('combo', [20, 40, 60], 20),
396
395
  'nucleus_channel': ('combo', chans_v2, None),
397
396
  'cell_channel': ('combo', chans_v2, None),
spacr/measure.py CHANGED
@@ -1028,7 +1028,10 @@ def measure_crop(settings):
1028
1028
  return
1029
1029
 
1030
1030
  if not isinstance(settings['crop_mode'], list):
1031
- print(f"WARNING: crop_mode should be a list with at least one element e.g. ['cell'] or ['cell','nucleus'] or [None]")
1031
+ print(f"WARNING: crop_mode should be a list with at least one element e.g. ['cell'] or ['cell','nucleus'] or [None] got: {settings['crop_mode']}")
1032
+ settings['crop_mode'] = [settings['crop_mode']]
1033
+ settings['crop_mode'] = [str(crop_mode) for crop_mode in settings['crop_mode']]
1034
+ print(f"Converted crop_mode to list: {settings['crop_mode']}")
1032
1035
  return
1033
1036
 
1034
1037
  _save_settings_to_db(settings)
spacr/plot.py CHANGED
@@ -16,6 +16,7 @@ from skimage import measure
16
16
  from skimage.measure import find_contours, label, regionprops
17
17
  from skimage.segmentation import mark_boundaries
18
18
  from skimage.transform import resize as sk_resize
19
+ import scikit_posthocs as sp
19
20
 
20
21
  import tifffile as tiff
21
22
 
@@ -2844,9 +2845,13 @@ class spacrGraph:
2844
2845
  len(self.df[self.df[self.grouping_column] == unique_groups[1]])})
2845
2846
 
2846
2847
  return test_results
2847
-
2848
+
2848
2849
  def perform_posthoc_tests(self, is_normal, unique_groups):
2849
2850
  """Perform post-hoc tests for multiple groups based on all_to_all flag."""
2851
+
2852
+ from .utils import choose_p_adjust_method
2853
+
2854
+ posthoc_results = []
2850
2855
  if is_normal and len(unique_groups) > 2 and self.all_to_all:
2851
2856
  tukey_result = pairwise_tukeyhsd(self.df[self.data_column], self.df[self.grouping_column], alpha=0.05)
2852
2857
  posthoc_results = []
@@ -2862,22 +2867,40 @@ class spacrGraph:
2862
2867
  'n_object': len(raw_data1) + len(raw_data2),
2863
2868
  'n_well': len(self.df[self.df[self.grouping_column] == comparison[0]]) + len(self.df[self.df[self.grouping_column] == comparison[1]])})
2864
2869
  return posthoc_results
2865
-
2866
- elif len(unique_groups) > 2 and not self.all_to_all and self.compare_group:
2867
- dunn_result = pg.pairwise_tests(data=self.df, dv=self.data_column, between=self.grouping_column, padjust='bonf', test='dunn')
2868
- posthoc_results = []
2869
- for idx, row in dunn_result.iterrows():
2870
- if row['A'] == self.compare_group or row['B'] == self.compare_group:
2871
- posthoc_results.append({
2872
- 'Comparison': f"{row['A']} vs {row['B']}",
2873
- 'Test Statistic': row['T'], # Test statistic from Dunn's test
2874
- 'p-value': row['p-val'],
2875
- 'Test Name': 'Dunn’s Post-hoc',
2876
- 'n_object': None,
2877
- 'n_well': None})
2878
-
2870
+
2871
+ elif len(unique_groups) > 2 and self.all_to_all:
2872
+ print('performing_dunns')
2873
+
2874
+ # Prepare data for Dunn's test in long format
2875
+ long_data = self.df[[self.data_column[0], self.grouping_column]].dropna()
2876
+
2877
+ p_adjust_method = choose_p_adjust_method(num_groups=len(long_data[self.grouping_column].unique()),num_data_points=len(long_data) // len(long_data[self.grouping_column].unique()))
2878
+
2879
+ # Perform Dunn's test with Bonferroni correction
2880
+ dunn_result = sp.posthoc_dunn(
2881
+ long_data,
2882
+ val_col=self.data_column[0],
2883
+ group_col=self.grouping_column,
2884
+ p_adjust=p_adjust_method
2885
+ )
2886
+
2887
+ for group_a, group_b in zip(*np.triu_indices_from(dunn_result, k=1)):
2888
+ raw_data1 = self.raw_df[self.raw_df[self.grouping_column] == dunn_result.index[group_a]][self.data_column]
2889
+ raw_data2 = self.raw_df[self.raw_df[self.grouping_column] == dunn_result.columns[group_b]][self.data_column]
2890
+
2891
+ posthoc_results.append({
2892
+ 'Comparison': f"{dunn_result.index[group_a]} vs {dunn_result.columns[group_b]}",
2893
+ 'Test Statistic': None, # Dunn's test does not return a specific test statistic
2894
+ 'p-value': dunn_result.iloc[group_a, group_b], # Extract the p-value from the matrix
2895
+ 'Test Name': "Dunn's Post-hoc",
2896
+ 'p_adjust_method': p_adjust_method,
2897
+ 'n_object': len(raw_data1) + len(raw_data2), # Total objects
2898
+ 'n_well': len(self.df[self.df[self.grouping_column] == dunn_result.index[group_a]]) +
2899
+ len(self.df[self.grouping_column] == dunn_result.columns[group_b])})
2900
+
2879
2901
  return posthoc_results
2880
- return []
2902
+
2903
+ return posthoc_results
2881
2904
 
2882
2905
  def create_plot(self, ax=None):
2883
2906
  """Create and display the plot based on the chosen graph type."""
@@ -2913,7 +2936,43 @@ class spacrGraph:
2913
2936
  transposed_table = list(map(list, zip(*table_data)))
2914
2937
  return row_labels, transposed_table
2915
2938
 
2916
- def _place_symbols(row_labels, transposed_table, x_positions, ax):
2939
+
2940
+ def _place_symbols(row_labels, transposed_table, x_positions, ax):
2941
+ """
2942
+ Places symbols and row labels aligned under the bars or jitter points on the graph.
2943
+
2944
+ Parameters:
2945
+ - row_labels: List of row titles to be displayed along the y-axis.
2946
+ - transposed_table: Data to be placed under each bar/jitter as symbols.
2947
+ - x_positions: X-axis positions for each group to align the symbols.
2948
+ - ax: The matplotlib Axes object where the plot is drawn.
2949
+ """
2950
+ # Get plot dimensions and adjust for different plot sizes
2951
+ y_axis_min = ax.get_ylim()[0] # Minimum y-axis value (usually 0)
2952
+ symbol_start_y = y_axis_min - 0.05 * (ax.get_ylim()[1] - y_axis_min) # Adjust a bit below the x-axis
2953
+
2954
+ # Calculate spacing for the table rows (adjust as needed)
2955
+ y_spacing = 0.04 # Adjust this for better spacing between rows
2956
+
2957
+ # Determine the leftmost x-position for row labels (align with the y-axis)
2958
+ label_x_pos = ax.get_xlim()[0] - 0.3 # Adjust offset from the y-axis
2959
+
2960
+ # Place row labels vertically aligned with symbols
2961
+ for row_idx, title in enumerate(row_labels):
2962
+ y_pos = symbol_start_y - (row_idx * y_spacing) # Calculate vertical position for each label
2963
+ ax.text(label_x_pos, y_pos, title, ha='right', va='center', fontsize=12, fontweight='regular')
2964
+
2965
+ # Place symbols under each bar or jitter point based on x-positions
2966
+ for idx, (x_pos, column_data) in enumerate(zip(x_positions, transposed_table)):
2967
+ for row_idx, text in enumerate(column_data):
2968
+ y_pos = symbol_start_y - (row_idx * y_spacing) # Adjust vertical spacing for symbols
2969
+ ax.text(x_pos, y_pos, text, ha='center', va='center', fontsize=12, fontweight='regular')
2970
+
2971
+ # Redraw to apply changes
2972
+ ax.figure.canvas.draw()
2973
+
2974
+
2975
+ def _place_symbols_v1(row_labels, transposed_table, x_positions, ax):
2917
2976
 
2918
2977
  # Get the bottom of the y-axis (y=0) in data coordinates and convert to display coordinates
2919
2978
  y_axis_min = ax.get_ylim()[0] # Minimum y-axis value (usually 0)
@@ -3048,6 +3107,10 @@ class spacrGraph:
3048
3107
  else:
3049
3108
  raise ValueError(f"Unknown graph type: {self.graph_type}")
3050
3109
 
3110
+ if len(self.data_column) == 1:
3111
+ num_groups = len(self.df[self.grouping_column].unique())
3112
+ self._standerdize_figure_format(ax=ax, num_groups=num_groups, graph_type=self.graph_type)
3113
+
3051
3114
  # Set y-axis start
3052
3115
  if isinstance(self.y_lim, list):
3053
3116
  if len(self.y_lim) == 2:
@@ -3082,7 +3145,73 @@ class spacrGraph:
3082
3145
  if self.save:
3083
3146
  self._save_results()
3084
3147
 
3085
- ax.margins(x=0.12)
3148
+ ax.margins(x=0.12)
3149
+
3150
+ def _standerdize_figure_format(self, ax, num_groups, graph_type):
3151
+ """
3152
+ Adjusts the figure layout (size, bar width, jitter, and spacing) based on the number of groups.
3153
+
3154
+ Parameters:
3155
+ - ax: The matplotlib Axes object.
3156
+ - num_groups: Number of unique groups.
3157
+ - graph_type: The type of graph (e.g., 'bar', 'jitter', 'box', etc.).
3158
+
3159
+ Returns:
3160
+ - None. Modifies the figure and Axes in place.
3161
+ """
3162
+ if graph_type in ['line', 'line_std']:
3163
+ print("Skipping layout adjustment for line graphs.")
3164
+ return # Skip layout adjustment for line graphs
3165
+
3166
+ correction_factor = 4
3167
+
3168
+ # Set figure size to ensure it remains square with a minimum size
3169
+ fig_size = max(6, num_groups * 2) / correction_factor
3170
+ ax.figure.set_size_inches(fig_size, fig_size)
3171
+
3172
+ # Configure layout based on the number of groups
3173
+ bar_width = min(0.8, 1.5 / num_groups) / correction_factor
3174
+ jitter_amount = min(0.1, 0.2 / num_groups) / correction_factor
3175
+ jitter_size = max(50 / num_groups, 200)
3176
+
3177
+ # Adjust axis limits to ensure bars are centered with respect to group labels
3178
+ ax.set_xlim(-0.5, num_groups - 0.5)
3179
+
3180
+ # Set ticks to match the group labels in your DataFrame
3181
+ group_labels = self.df[self.grouping_column].unique()
3182
+ ax.set_xticks(range(len(group_labels)))
3183
+ ax.set_xticklabels(group_labels, rotation=45, ha='right')
3184
+
3185
+ # Customize elements based on the graph type
3186
+ if graph_type == 'bar':
3187
+ # Adjust bars' width and position
3188
+ for bar in ax.patches:
3189
+ bar.set_width(bar_width)
3190
+ bar.set_x(bar.get_x() - bar_width / 2)
3191
+
3192
+ elif graph_type in ['jitter', 'jitter_bar', 'jitter_box']:
3193
+ # Adjust jitter points' position and size
3194
+ for coll in ax.collections:
3195
+ offsets = coll.get_offsets()
3196
+ offsets[:, 0] += jitter_amount # Shift jitter points slightly
3197
+ coll.set_offsets(offsets)
3198
+ coll.set_sizes([jitter_size] * len(offsets)) # Adjust point size dynamically
3199
+
3200
+ elif graph_type in ['box', 'violin']:
3201
+ # Adjust box width for consistent spacing
3202
+ for artist in ax.artists:
3203
+ artist.set_width(bar_width)
3204
+
3205
+ # Adjust legend and axis labels
3206
+ ax.tick_params(axis='x', labelsize=max(10, 15 - num_groups // 2))
3207
+ ax.tick_params(axis='y', labelsize=max(10, 15 - num_groups // 2))
3208
+
3209
+ if ax.get_legend():
3210
+ ax.get_legend().set_bbox_to_anchor((1.05, 1)) #loc='upper left',borderaxespad=0.
3211
+ ax.get_legend().prop.set_size(max(8, 12 - num_groups // 3))
3212
+
3213
+ # Redraw the figure to apply changes
3214
+ ax.figure.canvas.draw()
3086
3215
 
3087
3216
  def _create_bar_plot(self, ax):
3088
3217
  """Helper method to create a bar plot with consistent bar thickness and centered error bars."""
@@ -3301,11 +3430,11 @@ class spacrGraph:
3301
3430
  bar.set_x(bar.get_x() - target_width / 2)
3302
3431
 
3303
3432
  # Adjust error bars alignment with bars
3304
- bars = [bar for bar in ax.patches if isinstance(bar, plt.Rectangle)]
3305
- for bar, (_, row) in zip(bars, summary_df.iterrows()):
3306
- x_bar = bar.get_x() + bar.get_width() / 2
3307
- err = row[self.error_bar_type]
3308
- ax.errorbar(x=x_bar, y=bar.get_height(), yerr=err, fmt='none', c='black', capsize=5, lw=2)
3433
+ #bars = [bar for bar in ax.patches if isinstance(bar, plt.Rectangle)]
3434
+ #for bar, (_, row) in zip(bars, summary_df.iterrows()):
3435
+ # x_bar = bar.get_x() + bar.get_width() / 2
3436
+ # err = row[self.error_bar_type]
3437
+ # ax.errorbar(x=x_bar, y=bar.get_height(), yerr=err, fmt='none', c='black', capsize=5, lw=2)
3309
3438
 
3310
3439
  # Set legend and labels
3311
3440
  ax.set_xlabel(self.grouping_column)
spacr/settings.py CHANGED
@@ -246,7 +246,7 @@ def get_measure_crop_settings(settings={}):
246
246
  settings.setdefault('normalize_by','png')
247
247
  settings.setdefault('crop_mode',['cell'])
248
248
  settings.setdefault('dialate_pngs', False)
249
- settings.setdefault('dialate_png_ratios', [0.2,0.2])
249
+ settings.setdefault('dialate_png_ratios', [0.2])
250
250
 
251
251
  # Timelapsed settings
252
252
  settings.setdefault('timelapse', False)
@@ -697,16 +697,6 @@ expected_types = {
697
697
  "overlay_chans": list,
698
698
  "overlay": bool,
699
699
  "normalization_percentiles": list,
700
- "print_object_number": bool,
701
- "nr": int,
702
- "figuresize": int,
703
- "cmap": str,
704
- "test_mode": bool,
705
- "test_images": int,
706
- "remove_background_cell": bool,
707
- "remove_background_nucleus": bool,
708
- "remove_background_pathogen": bool,
709
- "pathogen_model": (str, type(None)),
710
700
  "filter": bool,
711
701
  "fill_in":bool,
712
702
  "upscale": bool,
@@ -825,18 +815,6 @@ expected_types = {
825
815
  "transform": (str, type(None)),
826
816
  "agg_type": str,
827
817
  "min_cell_count": int,
828
- "regression_type": str,
829
- "random_row_column_effects": bool,
830
- "alpha": float,
831
- "fraction_threshold": float,
832
- "class_1_threshold": (float, type(None)),
833
- "batch_size": int,
834
- "CP_prob": float,
835
- "flow_threshold": float,
836
- "percentiles": (list, type(None)),
837
- "invert": bool,
838
- "diameter": int,
839
- "grayscale": bool,
840
818
  "resize": bool,
841
819
  "target_height": (int, type(None)),
842
820
  "target_width": (int, type(None)),
@@ -881,9 +859,6 @@ expected_types = {
881
859
  "metadata_type_by":str,
882
860
  "custom_measurement":str,
883
861
  "custom_model":bool,
884
- "size":int,
885
- "test_split":float,
886
- "class_metadata":list, # This is a list of lists
887
862
  "png_type":str,
888
863
  "custom_model_path":str,
889
864
  "generate_training_dataset":bool,
@@ -894,6 +869,7 @@ expected_types = {
894
869
  "correlate":bool,
895
870
  "target_layer":str,
896
871
  "save_to_db":bool,
872
+ "test_mode":bool,
897
873
  "normalize_input":bool,
898
874
  }
899
875
 
@@ -904,7 +880,7 @@ categories = {"Paths":[ "src", "grna", "barcodes", "custom_model_path", "dataset
904
880
  "Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
905
881
  "Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim", "pathogens", "pathogen_loc", "pathogen_types", "pathogen_plate_metadata", ],
906
882
  "Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
907
- "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "crop_mode", "dialate_pngs", "normalize", "use_bounding_box"],
883
+ "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "crop_mode", "normalize", "use_bounding_box"],
908
884
  "Sequencing": ["signal_direction","mode","comp_level","comp_type","save_h5","expected_end","offset","target_sequence","regex", "highlight"],
909
885
  "Generate Dataset":["save_to_db","file_metadata","class_metadata", "annotation_column","annotated_classes", "dataset_mode", "metadata_type_by","custom_measurement", "sample", "size"],
910
886
  "Hyperparamiters (Training)": ["png_type", "score_threshold","file_type", "train_channels", "epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes", "augment", "amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory"],
@@ -939,6 +915,9 @@ def check_settings(vars_dict, expected_types, q=None):
939
915
  continue
940
916
 
941
917
  value = var.get()
918
+ if value == 'None':
919
+ value = None
920
+
942
921
  expected_type = expected_types.get(key, str)
943
922
 
944
923
  try:
@@ -953,14 +932,19 @@ def check_settings(vars_dict, expected_types, q=None):
953
932
  # settings[key] = None
954
933
  else:
955
934
  raise ValueError("Invalid format for list or list of lists")
935
+
956
936
  elif expected_type == list:
957
937
  settings[key] = parse_list(value) if value else None
938
+
939
+ if isinstance(settings[key], list) and len(settings[key]) == 1:
940
+ settings[key] = settings[key][0]
941
+
958
942
  elif expected_type == bool:
959
943
  settings[key] = value if isinstance(value, bool) else value.lower() in ['true', '1', 't', 'y', 'yes']
960
944
  elif expected_type == (int, type(None)):
961
- settings[key] = int(value) if value else None
945
+ settings[key] = settings[key] = int(value) if isinstance(value, int) or str(value).isdigit() else None
962
946
  elif expected_type == (float, type(None)):
963
- settings[key] = float(value) if value else None
947
+ settings[key] = float(value) if isinstance(value, float) or (isinstance(value, str) and value.replace(".", "", 1).isdigit()) else None
964
948
  elif expected_type == (int, float):
965
949
  settings[key] = float(value) if '.' in value else int(value)
966
950
  elif expected_type == (str, type(None)):
@@ -1000,7 +984,7 @@ def check_settings(vars_dict, expected_types, q=None):
1000
984
  settings[key] = expected_type(value) if value else None
1001
985
  except (ValueError, SyntaxError) as e:
1002
986
  expected_type_name = ' or '.join([t.__name__ for t in expected_type]) if isinstance(expected_type, tuple) else expected_type.__name__
1003
- q.put(f"Error: Invalid format for {key}. Expected type: {expected_type_name}. Error: {e}")
987
+ q.put(f"Error: Invalid format for {key}. Expected type: {expected_type_name}. Error: {e}, Value entered: {value}")
1004
988
  return
1005
989
 
1006
990
  return settings
spacr/toxo.py CHANGED
@@ -10,6 +10,17 @@ from matplotlib.legend import Legend
10
10
  from matplotlib.transforms import Bbox
11
11
  from brokenaxes import brokenaxes
12
12
 
13
+ import os
14
+ import pandas as pd
15
+ import seaborn as sns
16
+ import matplotlib.pyplot as plt
17
+ from scipy.spatial.distance import cosine
18
+ from scipy.stats import pearsonr
19
+ import pandas as pd
20
+ import matplotlib.pyplot as plt
21
+ import seaborn as sns
22
+ from sklearn.metrics import mean_absolute_error
23
+
13
24
 
14
25
  from matplotlib.gridspec import GridSpec
15
26
 
@@ -450,4 +461,172 @@ def plot_gene_heatmaps(data, gene_list, columns, x_column='Gene ID', normalize=F
450
461
  plt.savefig(save_path, format='pdf', dpi=600, bbox_inches='tight')
451
462
  print(f"Figure saved to {save_path}")
452
463
 
453
- plt.show()
464
+ plt.show()
465
+
466
+ def generate_score_heatmap(settings):
467
+
468
+ def group_cv_score(csv, plate=1, column='c3', data_column='pred'):
469
+
470
+ df = pd.read_csv(csv)
471
+ if 'col' in df.columns:
472
+ df = df[df['col']==column]
473
+ elif 'column' in df.columns:
474
+ df['col'] = df['column']
475
+ df = df[df['col']==column]
476
+ if not plate is None:
477
+ df['plate'] = f"plate{plate}"
478
+ grouped_df = df.groupby(['plate', 'row', 'col'])[data_column].mean().reset_index()
479
+ grouped_df['prc'] = grouped_df['plate'].astype(str) + '_' + grouped_df['row'].astype(str) + '_' + grouped_df['col'].astype(str)
480
+ return grouped_df
481
+
482
+ def calculate_fraction_mixed_condition(csv, plate=1, column='c3', control_sgrnas = ['TGGT1_220950_1', 'TGGT1_233460_4']):
483
+ df = pd.read_csv(csv)
484
+ df = df[df['column_name']==column]
485
+ if plate not in df.columns:
486
+ df['plate'] = f"plate{plate}"
487
+ df = df[df['grna_name'].str.match(f'^{control_sgrnas[0]}$|^{control_sgrnas[1]}$')]
488
+ grouped_df = df.groupby(['plate', 'row_name', 'column_name'])['count'].sum().reset_index()
489
+ grouped_df = grouped_df.rename(columns={'count': 'total_count'})
490
+ merged_df = pd.merge(df, grouped_df, on=['plate', 'row_name', 'column_name'])
491
+ merged_df['fraction'] = merged_df['count'] / merged_df['total_count']
492
+ merged_df['prc'] = merged_df['plate'].astype(str) + '_' + merged_df['row_name'].astype(str) + '_' + merged_df['column_name'].astype(str)
493
+ return merged_df
494
+
495
+ def plot_multi_channel_heatmap(df, column='c3'):
496
+ """
497
+ Plot a heatmap with multiple channels as columns.
498
+
499
+ Parameters:
500
+ - df: DataFrame with scores for different channels.
501
+ - column: Column to filter by (default is 'c3').
502
+ """
503
+ # Extract row number and convert to integer for sorting
504
+ df['row_num'] = df['row'].str.extract(r'(\d+)').astype(int)
505
+
506
+ # Filter and sort by plate, row, and column
507
+ df = df[df['col'] == column]
508
+ df = df.sort_values(by=['plate', 'row_num', 'col'])
509
+
510
+ # Drop temporary 'row_num' column after sorting
511
+ df = df.drop('row_num', axis=1)
512
+
513
+ # Create a new column combining plate, row, and column for the index
514
+ df['plate_row_col'] = df['plate'] + '-' + df['row'] + '-' + df['col']
515
+
516
+ # Set 'plate_row_col' as the index
517
+ df.set_index('plate_row_col', inplace=True)
518
+
519
+ # Extract only numeric data for the heatmap
520
+ heatmap_data = df.select_dtypes(include=[float, int])
521
+
522
+ # Plot heatmap with square boxes, no annotations, and 'viridis' colormap
523
+ plt.figure(figsize=(12, 8))
524
+ sns.heatmap(
525
+ heatmap_data,
526
+ cmap="viridis",
527
+ cbar=True,
528
+ square=True,
529
+ annot=False
530
+ )
531
+
532
+ plt.title("Heatmap of Prediction Scores for All Channels")
533
+ plt.xlabel("Channels")
534
+ plt.ylabel("Plate-Row-Column")
535
+ plt.tight_layout()
536
+
537
+ # Save the figure object and return it
538
+ fig = plt.gcf()
539
+ plt.show()
540
+
541
+ return fig
542
+
543
+
544
+ def combine_classification_scores(folders, csv_name, data_column, plate=1, column='c3'):
545
+ # Ensure `folders` is a list
546
+ if isinstance(folders, str):
547
+ folders = [folders]
548
+
549
+ ls = [] # Initialize ls to store found CSV file paths
550
+
551
+ # Iterate over the provided folders
552
+ for folder in folders:
553
+ sub_folders = os.listdir(folder) # Get sub-folder list
554
+ for sub_folder in sub_folders: # Iterate through sub-folders
555
+ path = os.path.join(folder, sub_folder) # Join the full path
556
+
557
+ if os.path.isdir(path): # Check if it’s a directory
558
+ csv = os.path.join(path, csv_name) # Join path to the CSV file
559
+ if os.path.exists(csv): # If CSV exists, add to list
560
+ ls.append(csv)
561
+ else:
562
+ print(f'No such file: {csv}')
563
+
564
+ # Initialize combined DataFrame
565
+ combined_df = None
566
+ print(f'Found {len(ls)} CSV files')
567
+
568
+ # Loop through all collected CSV files and process them
569
+ for csv_file in ls:
570
+ df = pd.read_csv(csv_file) # Read CSV into DataFrame
571
+ df = df[df['col']==column]
572
+ if not plate is None:
573
+ df['plate'] = f"plate{plate}"
574
+ # Group the data by 'plate', 'row', and 'col'
575
+ grouped_df = df.groupby(['plate', 'row', 'col'])[data_column].mean().reset_index()
576
+ # Use the CSV filename to create a new column name
577
+ folder_name = os.path.dirname(csv_file).replace(".csv", "")
578
+ new_column_name = os.path.basename(f"{folder_name}_{data_column}")
579
+ print(new_column_name)
580
+ grouped_df = grouped_df.rename(columns={data_column: new_column_name})
581
+
582
+ # Merge into the combined DataFrame
583
+ if combined_df is None:
584
+ combined_df = grouped_df
585
+ else:
586
+ combined_df = pd.merge(combined_df, grouped_df, on=['plate', 'row', 'col'], how='outer')
587
+ combined_df['prc'] = combined_df['plate'].astype(str) + '_' + combined_df['row'].astype(str) + '_' + combined_df['col'].astype(str)
588
+ return combined_df
589
+
590
+ def calculate_mae(df):
591
+ """
592
+ Calculate the MAE between each channel's predictions and the fraction column for all rows.
593
+ """
594
+ # Extract numeric columns excluding 'fraction' and 'prc'
595
+ channels = df.drop(columns=['fraction', 'prc']).select_dtypes(include=[float, int])
596
+
597
+ mae_data = []
598
+
599
+ # Compute MAE for each channel with 'fraction' for all rows
600
+ for column in channels.columns:
601
+ for index, row in df.iterrows():
602
+ mae = mean_absolute_error([row['fraction']], [row[column]])
603
+ mae_data.append({'Channel': column, 'MAE': mae, 'Row': row['prc']})
604
+
605
+ # Convert the list of dictionaries to a DataFrame
606
+ mae_df = pd.DataFrame(mae_data)
607
+ return mae_df
608
+
609
+ result_df = combine_classification_scores(settings['folders'], settings['csv_name'], settings['data_column'], settings['plate'], settings['column'], )
610
+ df = calculate_fraction_mixed_condition(settings['csv'], settings['plate'], settings['column'], settings['control_sgrnas'])
611
+ df = df[df['grna_name']==settings['fraction_grna']]
612
+ fraction_df = df[['fraction', 'prc']]
613
+ merged_df = pd.merge(fraction_df, result_df, on=['prc'])
614
+ cv_df = group_cv_score(settings['cv_csv'], settings['plate'], settings['column'], settings['data_column_cv'])
615
+ cv_df = cv_df[[settings['data_column_cv'], 'prc']]
616
+ merged_df = pd.merge(merged_df, cv_df, on=['prc'])
617
+
618
+ fig = plot_multi_channel_heatmap(merged_df, settings['column'])
619
+ if 'row_number' in merged_df.columns:
620
+ merged_df = merged_df.drop('row_num', axis=1)
621
+ mae_df = calculate_mae(merged_df)
622
+ if 'row_number' in mae_df.columns:
623
+ mae_df = mae_df.drop('row_num', axis=1)
624
+
625
+ if not settings['dst'] is None:
626
+ mae_dst = os.path.join(settings['dst'], f"mae_scores_comparison_plate_{settings['plate']}.csv")
627
+ merged_dst = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['plate']}_data.csv")
628
+ heatmap_save = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['plate']}.pdf")
629
+ mae_df.to_csv(mae_dst, index=False)
630
+ merged_df.to_csv(merged_dst, index=False)
631
+ fig.savefig(heatmap_save, format='pdf', dpi=600, bbox_inches='tight')
632
+ return merged_df
spacr/utils.py CHANGED
@@ -5232,4 +5232,27 @@ def control_filelist(folder, mode='column', values=['01','02']):
5232
5232
  filtered_files = [file for file in files if file.split('_')[1][1:] in values]
5233
5233
  if mode is 'row':
5234
5234
  filtered_files = [file for file in files if file.split('_')[1][:1] in values]
5235
- return filtered_files
5235
+ return filtered_files
5236
+
5237
+ def choose_p_adjust_method(num_groups, num_data_points):
5238
+ """
5239
+ Selects the most appropriate p-value adjustment method based on data characteristics.
5240
+
5241
+ Parameters:
5242
+ - num_groups: Number of unique groups being compared
5243
+ - num_data_points: Number of data points per group (assuming balanced groups)
5244
+
5245
+ Returns:
5246
+ - A string representing the recommended p-adjustment method
5247
+ """
5248
+ num_comparisons = (num_groups * (num_groups - 1)) // 2 # Number of pairwise comparisons
5249
+
5250
+ # Decision logic for choosing the adjustment method
5251
+ if num_comparisons <= 10 and num_data_points > 5:
5252
+ return 'holm' # Balanced between power and Type I error control
5253
+ elif num_comparisons > 10 and num_data_points <= 5:
5254
+ return 'fdr_bh' # FDR control for large number of comparisons and small sample size
5255
+ elif num_comparisons <= 10:
5256
+ return 'sidak' # Less conservative than Bonferroni, good for independent comparisons
5257
+ else:
5258
+ return 'bonferroni' # Very conservative, use for strict control of Type I errors
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spacr
3
- Version: 0.3.50
3
+ Version: 0.3.52
4
4
  Summary: Spatial phenotype analysis of crisp screens (SpaCr)
5
5
  Home-page: https://github.com/EinarOlafsson/spacr
6
6
  Author: Einar Birnir Olafsson
@@ -16,6 +16,7 @@ Requires-Dist: scipy<2.0,>=1.12.0
16
16
  Requires-Dist: cellpose<4.0,>=3.0.6
17
17
  Requires-Dist: scikit-image<1.0,>=0.22.0
18
18
  Requires-Dist: scikit-learn<2.0,>=1.4.1
19
+ Requires-Dist: scikit-posthocs<0.20,>=0.10.0
19
20
  Requires-Dist: mahotas<2.0,>=1.4.13
20
21
  Requires-Dist: btrack<1.0,>=0.6.5
21
22
  Requires-Dist: trackpy<1.0,>=0.6.2
@@ -14,21 +14,21 @@ spacr/deep_spacr.py,sha256=HdOcNU8cHcE_19nP7_5uTz-ih3E169ffr2Hm--NvMvA,43255
14
14
  spacr/gui.py,sha256=ARyn9Q_g8HoP-cXh1nzMLVFCKqthY4v2u9yORyaQqQE,8230
15
15
  spacr/gui_core.py,sha256=N7R7yvfK_dJhOReM_kW3Ci8Bokhi1OzsxeKqvSGdvV4,41460
16
16
  spacr/gui_elements.py,sha256=Dr9KEek41LggJ2z2zfh28a7w86sZXg1jzF388rF2BT4,138249
17
- spacr/gui_utils.py,sha256=KDWDWsi7UdZVhXk1ZWGx3ZqJMIxCUm3lGfjrVhbk52s,45463
17
+ spacr/gui_utils.py,sha256=aOVI2G71pObolEjuSgAKr86XJZtN98uqqBfCbucMhRQ,45560
18
18
  spacr/io.py,sha256=1rIdJ_8dyn7W4D2zXjaOqlgyo_Y5Z7X86aRp4hNYWCU,144194
19
19
  spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
20
- spacr/measure.py,sha256=KdboGXoi85BO5-_6er7932FgjFI7G7tuaQDnWSiEuew,54817
20
+ spacr/measure.py,sha256=euywVbBXRllht-frnxy9QHV76i8HkPPjm3dJw46kcz0,55085
21
21
  spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
22
22
  spacr/ml.py,sha256=Oykp3drBxZrcwrWQh2n6Xt1OzZER6pSIiaR-W0GO2_E,67353
23
23
  spacr/openai.py,sha256=5vBZ3Jl2llYcW3oaTEXgdyCB2aJujMUIO5K038z7w_A,1246
24
- spacr/plot.py,sha256=Wy5ac-InIn0VCfHNm1-MzFncNZqsTs4tHDWWFRdPz3Y,163420
24
+ spacr/plot.py,sha256=B4nxp7NFyM6MmtarW9zbxydeEvVIMgPqHEElkCIycpA,169566
25
25
  spacr/sequencing.py,sha256=HDpF_C3hRd-fk6ZENPmI3vgYoom3HIvaeIIZWLhaIAY,25037
26
- spacr/settings.py,sha256=QXtnWbDlABezc3wQjV-jEJvJTfEupkK3WYyKTcHkghk,77710
26
+ spacr/settings.py,sha256=2Er3G6ApcemIEhyQCsF8zmVdBW8KiKf4nkSismu03s8,77358
27
27
  spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
28
28
  spacr/submodules.py,sha256=3C5M4UbI9Ral1MX4PTpucaAaqhL3RADuCOCqaHhMyUg,28048
29
29
  spacr/timelapse.py,sha256=FSYpUtAVy6xc3lwprRYgyDTT9ysUhfRQ4zrP9_h2mvg,39465
30
- spacr/toxo.py,sha256=od_nHj3xSkqBnRqIy0Pr9rzBpexxuBMKLUrWKucNRpc,17055
31
- spacr/utils.py,sha256=PKePCs3BVpaSV0XWLEcXMntI1l4hWiajM3eorHdo8Z8,221417
30
+ spacr/toxo.py,sha256=bcH5Ug6xtf3BBxvOFc8e4Jc5R3f6-C46oHhgiSoYl3U,24935
31
+ spacr/utils.py,sha256=clrjlUOhY-LQH0bTIiNJ9VMBEU9qlRbTvoU7-yNLzsg,222475
32
32
  spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
33
33
  spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
34
34
  spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
@@ -151,9 +151,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
151
151
  spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
152
152
  spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
153
153
  spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
154
- spacr-0.3.50.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
155
- spacr-0.3.50.dist-info/METADATA,sha256=daGbAWScl6sfl9uYXfvBZ3R9FjG9uHmROss7eKpgZYk,5987
156
- spacr-0.3.50.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
157
- spacr-0.3.50.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
158
- spacr-0.3.50.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
159
- spacr-0.3.50.dist-info/RECORD,,
154
+ spacr-0.3.52.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
155
+ spacr-0.3.52.dist-info/METADATA,sha256=I0OCX9zCPvfgnnGS2JYaV-vdNvirglSfO6IzX68s8uI,6032
156
+ spacr-0.3.52.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
157
+ spacr-0.3.52.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
158
+ spacr-0.3.52.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
159
+ spacr-0.3.52.dist-info/RECORD,,
File without changes