spacr 0.3.81__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/plot.py CHANGED
@@ -32,6 +32,7 @@ from IPython.display import Image as ipyimage
32
32
  import matplotlib.patches as patches
33
33
  from collections import defaultdict
34
34
  from matplotlib.gridspec import GridSpec
35
+ from matplotlib_venn import venn2
35
36
 
36
37
  #filter_dict={'cell':[(0,100000), (0, 65000)],'nucleus':[(3000,100000), (1500, 65000)],'pathogen':[(500,100000), (0, 65000)]}
37
38
  def plot_image_mask_overlay(
@@ -1381,11 +1382,11 @@ def _plot_recruitment(df, df_type, channel_of_interest, columns=[], figuresize=1
1381
1382
  axes[3].set_xlabel(f'pathogen {df_type}', fontsize=font)
1382
1383
  axes[3].set_ylabel(f'pathogen_channel_{channel_of_interest}_mean_intensity', fontsize=font)
1383
1384
 
1384
- axes[0].legend_.remove()
1385
- axes[1].legend_.remove()
1386
- axes[2].legend_.remove()
1387
- axes[3].legend_.remove()
1388
-
1385
+ #axes[0].legend_.remove()
1386
+ #axes[1].legend_.remove()
1387
+ #axes[2].legend_.remove()
1388
+ #axes[3].legend_.remove()
1389
+
1389
1390
  handles, labels = axes[3].get_legend_handles_labels()
1390
1391
  axes[3].legend(handles, labels, bbox_to_anchor=(1.05, 0.5), loc='center left')
1391
1392
  for i in [0,1,2,3]:
@@ -2043,7 +2044,9 @@ def plot_histogram(df, column, dst=None):
2043
2044
 
2044
2045
  plt.show()
2045
2046
 
2046
- def plot_lorenz_curves(csv_files, name_column='grna_name', value_column='count', remove_keys=['TGGT1_220950_1', 'TGGT1_233460_4'], x_lim=[0.0,1], y_lim=[0,1], save=True):
2047
+ def plot_lorenz_curves(csv_files, name_column='grna_name', value_column='count',
2048
+ remove_keys=None,
2049
+ x_lim=[0.0, 1], y_lim=[0, 1], remove_outliers=False, save=True):
2047
2050
 
2048
2051
  def lorenz_curve(data):
2049
2052
  """Calculate Lorenz curve."""
@@ -2053,34 +2056,64 @@ def plot_lorenz_curves(csv_files, name_column='grna_name', value_column='count',
2053
2056
  lorenz_curve = np.insert(lorenz_curve, 0, 0)
2054
2057
  return lorenz_curve
2055
2058
 
2059
+ def gini_coefficient(data):
2060
+ """Calculate Gini coefficient from data."""
2061
+ sorted_data = np.sort(data)
2062
+ n = len(data)
2063
+ cumulative_data = np.cumsum(sorted_data) / np.sum(sorted_data)
2064
+ cumulative_data = np.insert(cumulative_data, 0, 0)
2065
+ gini = 1 - 2 * np.sum(cumulative_data[:-1] * np.diff(np.linspace(0, 1, n + 1)))
2066
+ return gini
2067
+
2068
+ def remove_outliers_by_wells(data, name_col, wells_col):
2069
+ """Remove outliers based on 95% confidence interval for well counts."""
2070
+ well_counts = data.groupby(name_col).size()
2071
+ q1 = well_counts.quantile(0.05)
2072
+ q3 = well_counts.quantile(0.95)
2073
+ iqr_range = q3 - q1
2074
+ lower_bound = q1 - 1.5 * iqr_range
2075
+ upper_bound = q3 + 1.5 * iqr_range
2076
+ valid_names = well_counts[(well_counts >= lower_bound) & (well_counts <= upper_bound)].index
2077
+ return data[data[name_col].isin(valid_names)]
2078
+
2056
2079
  combined_data = []
2080
+ gini_values = {}
2057
2081
 
2058
2082
  plt.figure(figsize=(10, 10))
2059
2083
 
2060
2084
  for idx, csv_file in enumerate(csv_files):
2061
- if idx == 1:
2062
- save_fldr = os.path.dirname(csv_file)
2063
- save_path = os.path.join(save_fldr, 'lorenz_curve.pdf')
2064
-
2065
2085
  df = pd.read_csv(csv_file)
2086
+
2087
+ # Remove specified keys
2066
2088
  for remove in remove_keys:
2067
2089
  df = df[df[name_column] != remove]
2068
2090
 
2091
+ # Remove outliers
2092
+ if remove_outliers:
2093
+ df = remove_outliers_by_wells(df, name_column, value_column)
2094
+
2069
2095
  values = df[value_column].values
2070
2096
  combined_data.extend(values)
2071
2097
 
2098
+ # Calculate Lorenz curve and Gini coefficient
2072
2099
  lorenz = lorenz_curve(values)
2073
- name = f"plate {idx+1}"
2100
+ gini = gini_coefficient(values)
2101
+ gini_values[f"plate {idx+1}"] = gini
2102
+
2103
+ name = f"plate {idx+1} (Gini: {gini:.4f})"
2074
2104
  plt.plot(np.linspace(0, 1, len(lorenz)), lorenz, label=name)
2075
2105
 
2076
2106
  # Plot combined Lorenz curve
2077
2107
  combined_lorenz = lorenz_curve(np.array(combined_data))
2078
- plt.plot(np.linspace(0, 1, len(combined_lorenz)), combined_lorenz, label="Combined", linestyle='--', color='black')
2108
+ combined_gini = gini_coefficient(np.array(combined_data))
2109
+ gini_values["Combined"] = combined_gini
2110
+
2111
+ plt.plot(np.linspace(0, 1, len(combined_lorenz)), combined_lorenz, label=f"Combined (Gini: {combined_gini:.4f})", linestyle='--', color='black')
2079
2112
 
2080
- if x_lim != None:
2113
+ if x_lim is not None:
2081
2114
  plt.xlim(x_lim)
2082
2115
 
2083
- if y_lim != None:
2116
+ if y_lim is not None:
2084
2117
  plt.ylim(y_lim)
2085
2118
 
2086
2119
  plt.title('Lorenz Curves')
@@ -2092,10 +2125,15 @@ def plot_lorenz_curves(csv_files, name_column='grna_name', value_column='count',
2092
2125
  if save:
2093
2126
  save_path = os.path.join(os.path.dirname(csv_files[0]), 'results')
2094
2127
  os.makedirs(save_path, exist_ok=True)
2095
- save_file_path = os.path.join(save_path, 'lorenz_curve.pdf')
2128
+ save_file_path = os.path.join(save_path, 'lorenz_curve_with_gini.pdf')
2096
2129
  plt.savefig(save_file_path, format='pdf', bbox_inches='tight')
2097
2130
  print(f"Saved Lorenz Curve: {save_file_path}")
2098
- plt.show()
2131
+
2132
+ plt.show()
2133
+
2134
+ # Print Gini coefficients
2135
+ for plate, gini in gini_values.items():
2136
+ print(f"{plate}: Gini Coefficient = {gini:.4f}")
2099
2137
 
2100
2138
  def plot_permutation(permutation_df):
2101
2139
  num_features = len(permutation_df)
@@ -2484,21 +2522,79 @@ class spacrGraph:
2484
2522
  plt.show()
2485
2523
  return reordered_palette
2486
2524
 
2525
+ #def preprocess_data(self):
2526
+ # """Preprocess the data: remove NaNs, sort/order the grouping column, and optionally group by 'prc'."""
2527
+ # # Remove NaNs in both the grouping column and each data column
2528
+ # df = self.df.dropna(subset=[self.grouping_column] + self.data_column)
2529
+ # # Group by 'prc' column if representation is 'well'
2530
+ # if self.representation == 'well':
2531
+ # df = df.groupby(['prc', self.grouping_column])[self.data_column].agg(self.summary_func).reset_index()
2532
+ # if self.representation == 'plate':
2533
+ # df = df.groupby(['plate', self.grouping_column])[self.data_column].agg(self.summary_func).reset_index()
2534
+ # if self.order:
2535
+ # df[self.grouping_column] = pd.Categorical(df[self.grouping_column], categories=self.order, ordered=True)
2536
+ # else:
2537
+ # df[self.grouping_column] = pd.Categorical(df[self.grouping_column], categories=sorted(df[self.grouping_column].unique()), ordered=True)
2538
+ # return df
2539
+
2487
2540
  def preprocess_data(self):
2488
- """Preprocess the data: remove NaNs, sort/order the grouping column, and optionally group by 'prc'."""
2489
- # Remove NaNs in both the grouping column and each data column
2541
+ """
2542
+ Preprocess the data: remove NaNs, optionally ensure 'plate' column is created,
2543
+ then group by either 'prc', 'plate', or do no grouping at all if representation == 'object'.
2544
+ """
2545
+ # 1) Remove NaNs in both the grouping column and each data column
2490
2546
  df = self.df.dropna(subset=[self.grouping_column] + self.data_column)
2491
- # Group by 'prc' column if representation is 'well'
2492
- if self.representation == 'well':
2493
- df = df.groupby(['prc', self.grouping_column])[self.data_column].agg(self.summary_func).reset_index()
2494
- if self.representation == 'plate':
2495
- df = df.groupby(['plate', self.grouping_column])[self.data_column].agg(self.summary_func).reset_index()
2496
- if self.order:
2497
- df[self.grouping_column] = pd.Categorical(df[self.grouping_column], categories=self.order, ordered=True)
2547
+
2548
+ # 2) Decide how to handle grouping based on 'representation'
2549
+ if self.representation == 'object':
2550
+ # -- No grouping at all --
2551
+ # We do nothing except keep df as-is after removing NaNs
2552
+ group_cols = None
2553
+
2554
+ elif self.representation == 'well':
2555
+ # Group by ['prc', grouping_column]
2556
+ group_cols = ['prc', self.grouping_column]
2557
+
2558
+ elif self.representation == 'plate':
2559
+ # Make sure 'plate' exists (split from 'prc' if needed)
2560
+ if 'plate' not in df.columns:
2561
+ if 'prc' in df.columns:
2562
+ df[['plate', 'row', 'column']] = df['prc'].str.split('_', expand=True)
2563
+ else:
2564
+ raise KeyError(
2565
+ "Representation is 'plate', but no 'plate' column found. "
2566
+ "Also cannot split from 'prc' because 'prc' column is missing."
2567
+ )
2568
+ # If the grouping column IS 'plate', only group by ['plate'] once
2569
+ if self.grouping_column == 'plate':
2570
+ group_cols = ['plate']
2571
+ else:
2572
+ group_cols = ['plate', self.grouping_column]
2573
+
2498
2574
  else:
2499
- df[self.grouping_column] = pd.Categorical(df[self.grouping_column], categories=sorted(df[self.grouping_column].unique()), ordered=True)
2500
- return df
2575
+ raise ValueError(f"Unknown representation: {self.representation}")
2576
+
2577
+ # 3) Perform grouping only if group_cols is set
2578
+ if group_cols is not None:
2579
+ df = df.groupby(group_cols)[self.data_column].agg(self.summary_func).reset_index()
2580
+
2581
+ # 4) Handle ordering if specified (and if the grouping_column still exists)
2582
+ if self.order and (self.grouping_column in df.columns):
2583
+ df[self.grouping_column] = pd.Categorical(
2584
+ df[self.grouping_column],
2585
+ categories=self.order,
2586
+ ordered=True
2587
+ )
2588
+ elif (self.grouping_column in df.columns):
2589
+ # Default to sorting unique values
2590
+ df[self.grouping_column] = pd.Categorical(
2591
+ df[self.grouping_column],
2592
+ categories=sorted(df[self.grouping_column].unique()),
2593
+ ordered=True
2594
+ )
2501
2595
 
2596
+ return df
2597
+
2502
2598
  def remove_outliers_from_plot(self):
2503
2599
  """Remove outliers from the plot but keep them in the data."""
2504
2600
  filtered_df = self.df.copy()
@@ -2900,6 +2996,11 @@ class spacrGraph:
2900
2996
 
2901
2997
  # Set figure size to ensure it remains square with a minimum size
2902
2998
  fig_size = max(6, num_groups * 2) / correction_factor
2999
+
3000
+ if fig_size < 10:
3001
+ fig_size = 10
3002
+
3003
+
2903
3004
  ax.figure.set_size_inches(fig_size, fig_size)
2904
3005
 
2905
3006
  # Configure layout based on the number of groups
@@ -2948,66 +3049,6 @@ class spacrGraph:
2948
3049
  # Redraw the figure to apply changes
2949
3050
  ax.figure.canvas.draw()
2950
3051
 
2951
- def _standerdize_figure_format_v1(self, ax, num_groups, graph_type):
2952
- """
2953
- Adjusts the figure layout (size, bar width, jitter, and spacing) based on the number of groups.
2954
- """
2955
- if graph_type in ['line', 'line_std']:
2956
- print("Skipping layout adjustment for line graphs.")
2957
- return # Skip layout adjustment for line graphs
2958
-
2959
- correction_factor = 4
2960
-
2961
- # Set figure size to ensure it remains square with a minimum size
2962
- fig_size = max(6, num_groups * 2) / correction_factor
2963
- ax.figure.set_size_inches(fig_size, fig_size)
2964
-
2965
- # Configure layout based on the number of groups
2966
- bar_width = min(0.8, 1.5 / num_groups) / correction_factor
2967
- jitter_amount = min(0.1, 0.2 / num_groups) / correction_factor
2968
- jitter_size = max(50 / num_groups, 200)
2969
-
2970
- # Adjust x-axis limits to fit the specified order of groups
2971
- ax.set_xlim(-0.5, len(self.order) - 0.5) # Use `self.order` length to ensure alignment
2972
-
2973
- # Use `self.order` as the x-tick labels to maintain consistent ordering
2974
- ax.set_xticks(range(len(self.order)))
2975
- #ax.set_xticklabels(self.order, rotation=45, ha='right')
2976
- plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
2977
-
2978
- # Customize elements based on the graph type
2979
- if graph_type == 'bar':
2980
- # Adjust bars' width and position
2981
- for bar in ax.patches:
2982
- bar.set_width(bar_width)
2983
- bar.set_x(bar.get_x() - bar_width / 2)
2984
-
2985
- elif graph_type in ['jitter', 'jitter_bar', 'jitter_box']:
2986
- # Adjust jitter points' position and size
2987
- for coll in ax.collections:
2988
- offsets = coll.get_offsets()
2989
- offsets[:, 0] += jitter_amount # Shift jitter points slightly
2990
- coll.set_offsets(offsets)
2991
- coll.set_sizes([jitter_size] * len(offsets)) # Adjust point size dynamically
2992
-
2993
- elif graph_type in ['box', 'violin']:
2994
- # Adjust box width for consistent spacing
2995
- for artist in ax.artists:
2996
- artist.set_width(bar_width)
2997
-
2998
- # Adjust legend and axis labels
2999
- ax.tick_params(axis='x', labelsize=max(10, 15 - num_groups // 2))
3000
- ax.tick_params(axis='y', labelsize=max(10, 15 - num_groups // 2))
3001
-
3002
- # Adjust legend placement and size
3003
- if ax.get_legend():
3004
- ax.get_legend().set_bbox_to_anchor((1.05, 1))
3005
- ax.get_legend().prop.set_size(max(8, 12 - num_groups // 3))
3006
-
3007
- # Redraw the figure to apply changes
3008
- ax.figure.canvas.draw()
3009
-
3010
-
3011
3052
  def _create_bar_plot(self, ax):
3012
3053
  """Helper method to create a bar plot with consistent bar thickness and centered error bars."""
3013
3054
  # Flatten DataFrame: Combine grouping column and data column into one group if needed
@@ -3328,7 +3369,7 @@ def plot_data_from_db(settings):
3328
3369
  [df1] = _read_db(db_loc, tables=[settings['table_names']])
3329
3370
  else:
3330
3371
  df1, _ = _read_and_merge_data(locs=[db_loc],
3331
- tables = settings['tables'],
3372
+ tables = settings['table_names'],
3332
3373
  verbose=settings['verbose'],
3333
3374
  nuclei_limit=settings['nuclei_limit'],
3334
3375
  pathogen_limit=settings['pathogen_limit'])
@@ -3355,6 +3396,13 @@ def plot_data_from_db(settings):
3355
3396
  df = df.dropna(subset='treatment')
3356
3397
 
3357
3398
  df = df.dropna(subset=settings['data_column'])
3399
+
3400
+ if settings['grouping_column'] not in df.columns:
3401
+ print(f"Grouping column {settings['grouping_column']} not found in DataFrame.")
3402
+ print(f'Please use one of the following columns: {df.columns}')
3403
+ display(df)
3404
+ return None
3405
+
3358
3406
  df = df.dropna(subset=settings['grouping_column'])
3359
3407
 
3360
3408
  src = srcs[0]
@@ -3410,8 +3458,6 @@ def plot_data_from_csv(settings):
3410
3458
  else:
3411
3459
  raise ValueError("src must be a string or a list of strings.")
3412
3460
 
3413
- #save_settings(settings, name=f"{settings['graph_name']}_plot_settings_csv", show=True)
3414
-
3415
3461
  dfs = []
3416
3462
  for i, src in enumerate(srcs):
3417
3463
  dft = pd.read_csv(src)
@@ -3421,7 +3467,17 @@ def plot_data_from_csv(settings):
3421
3467
  dfs.append(dft)
3422
3468
 
3423
3469
  df = pd.concat(dfs, axis=0)
3470
+
3471
+ if 'prc' in df.columns:
3472
+ # Check if 'plate', 'row', and 'column' are all missing from df.columns
3473
+ if not all(col in df.columns for col in ['plate', 'row_name', 'column_name']):
3474
+ try:
3475
+ # Split 'prc' into 'plate', 'row', and 'column'
3476
+ df[['plate', 'row_name', 'column_name']] = df['prc'].str.split('_', expand=True)
3477
+ except Exception as e:
3478
+ print(f"Could not split the prc column: {e}")
3424
3479
 
3480
+
3425
3481
  display(df)
3426
3482
 
3427
3483
  df = df.dropna(subset=settings['data_column'])
@@ -3812,3 +3868,58 @@ def plot_proportion_stacked_bars(settings, df, group_column, bin_column, prc_col
3812
3868
  })
3813
3869
 
3814
3870
  return results_df, pairwise_results, fig
3871
+
3872
+ def create_venn_diagram(file1, file2, gene_column="gene", filter_coeff=0.1, save=True, save_path=None):
3873
+ """
3874
+ Reads two CSV files, extracts the `gene` column, and creates a Venn diagram
3875
+ to show overlapping and non-overlapping genes.
3876
+
3877
+ Parameters:
3878
+ file1 (str): Path to the first CSV file.
3879
+ file2 (str): Path to the second CSV file.
3880
+ gene_column (str): Name of the column containing gene data (default: "gene").
3881
+ filter_coeff (float): Coefficient threshold for filtering genes.
3882
+ save (bool): Whether to save the plot.
3883
+ save_path (str): Path to save the Venn diagram figure.
3884
+
3885
+ Returns:
3886
+ dict: Overlapping and non-overlapping genes.
3887
+ """
3888
+ # Read CSV files
3889
+ df1 = pd.read_csv(file1)
3890
+ df2 = pd.read_csv(file2)
3891
+
3892
+ # Filter based on coefficient
3893
+ if filter_coeff is not None:
3894
+ df1 = df1[df1['coefficient'] > filter_coeff] if filter_coeff >= 0 else df1[df1['coefficient'] < filter_coeff]
3895
+ df2 = df2[df2['coefficient'] > filter_coeff] if filter_coeff >= 0 else df2[df2['coefficient'] < filter_coeff]
3896
+
3897
+ # Extract gene columns and drop NaN values
3898
+ genes1 = set(df1[gene_column].dropna())
3899
+ genes2 = set(df2[gene_column].dropna())
3900
+
3901
+ # Calculate overlapping and non-overlapping genes
3902
+ overlapping_genes = genes1.intersection(genes2)
3903
+ unique_to_file1 = genes1.difference(genes2)
3904
+ unique_to_file2 = genes2.difference(genes1)
3905
+
3906
+ # Create a Venn diagram
3907
+ plt.figure(figsize=(8, 6))
3908
+ venn = venn2([genes1, genes2], ('File 1 Genes', 'File 2 Genes'))
3909
+ plt.title("Venn Diagram of Overlapping Genes")
3910
+
3911
+ # Save or show the figure
3912
+ if save:
3913
+ if save_path is None:
3914
+ raise ValueError("save_path must be provided when save=True.")
3915
+ plt.savefig(save_path, dpi=300, bbox_inches="tight", format='pdf')
3916
+ print(f"Venn diagram saved to {save_path}")
3917
+ else:
3918
+ plt.show()
3919
+
3920
+ # Return the results
3921
+ return {
3922
+ "overlap": list(overlapping_genes),
3923
+ "unique_to_file1": list(unique_to_file1),
3924
+ "unique_to_file2": list(unique_to_file2)
3925
+ }
spacr/settings.py CHANGED
@@ -24,15 +24,10 @@ def set_default_plot_merge_settings():
24
24
  settings.setdefault('verbose', True)
25
25
  return settings
26
26
 
27
- def set_default_settings_preprocess_generate_masks(src, settings={}):
28
- # Main settings
29
- if src != None:
30
- settings['src'] = src
31
- else:
32
- settings.setdefault('src', 'path')
33
- if 'src' not in settings:
34
- settings['src'] = 'path'
35
-
27
+ def set_default_settings_preprocess_generate_masks(settings={}):
28
+
29
+ settings.setdefault('src', 'path')
30
+ settings.setdefault('delete_intermediate', False)
36
31
  settings.setdefault('segmentation_mode', 'cellpose')
37
32
  settings.setdefault('preprocess', True)
38
33
  settings.setdefault('masks', True)
@@ -49,6 +44,10 @@ def set_default_settings_preprocess_generate_masks(src, settings={}):
49
44
  settings.setdefault('remove_background_cell', False)
50
45
  settings.setdefault('remove_background_nucleus', False)
51
46
  settings.setdefault('remove_background_pathogen', False)
47
+
48
+ settings.setdefault('cell_diamiter', None)
49
+ settings.setdefault('nucleus_diamiter', None)
50
+ settings.setdefault('pathogen_diamiter', None)
52
51
 
53
52
  # Channel settings
54
53
  settings.setdefault('cell_channel', None)
@@ -147,12 +146,27 @@ def _get_object_settings(object_type, settings):
147
146
  object_settings['filter_size'] = False
148
147
  object_settings['filter_intensity'] = False
149
148
  object_settings['restore_type'] = settings.get('cell_restore_type', None)
149
+ if settings['cell_diamiter'] is not None:
150
+ if isinstance(settings['cell_diamiter'], (int, float)):
151
+ object_settings['diameter'] = settings['cell_diamiter']
152
+ object_settings['minimum_size'] = (object_settings['diameter']**2)/4
153
+ object_settings['maximum_size'] = (object_settings['diameter']**2)*10
154
+ else:
155
+ print(f'Cell diameter must be an integer or float, got {settings["cell_diamiter"]}')
150
156
 
151
157
  elif object_type == 'nucleus':
152
158
  object_settings['model_name'] = 'nuclei'
153
159
  object_settings['filter_size'] = False
154
160
  object_settings['filter_intensity'] = False
155
161
  object_settings['restore_type'] = settings.get('nucleus_restore_type', None)
162
+
163
+ if settings['nucleus_diamiter'] is not None:
164
+ if isinstance(settings['nucleus_diamiter'], (int, float)):
165
+ object_settings['diameter'] = settings['nucleus_diamiter']
166
+ object_settings['minimum_size'] = (object_settings['diameter']**2)/4
167
+ object_settings['maximum_size'] = (object_settings['diameter']**2)*10
168
+ else:
169
+ print(f'Nucleus diameter must be an integer or float, got {settings["nucleus_diamiter"]}')
156
170
 
157
171
  elif object_type == 'pathogen':
158
172
  object_settings['model_name'] = 'cyto'
@@ -162,6 +176,14 @@ def _get_object_settings(object_type, settings):
162
176
  object_settings['restore_type'] = settings.get('pathogen_restore_type', None)
163
177
  object_settings['merge'] = settings['merge_pathogens']
164
178
 
179
+ if settings['pathogen_diamiter'] is not None:
180
+ if isinstance(settings['pathogen_diamiter'], (int, float)):
181
+ object_settings['diameter'] = settings['pathogen_diamiter']
182
+ object_settings['minimum_size'] = (object_settings['diameter']**2)/4
183
+ object_settings['maximum_size'] = (object_settings['diameter']**2)*10
184
+ else:
185
+ print(f'Pathogen diameter must be an integer or float, got {settings["pathogen_diamiter"]}')
186
+
165
187
  else:
166
188
  print(f'Object type: {object_type} not supported. Supported object types are : cell, nucleus and pathogen')
167
189
 
@@ -216,6 +238,8 @@ def set_default_umap_image_settings(settings={}):
216
238
  def get_measure_crop_settings(settings={}):
217
239
 
218
240
  settings.setdefault('src', 'path')
241
+ settings.setdefault('delete_intermediate', False)
242
+
219
243
  settings.setdefault('verbose', False)
220
244
  settings.setdefault('experiment', 'exp')
221
245
 
@@ -339,7 +363,7 @@ def set_default_train_test_model(settings):
339
363
  def set_generate_training_dataset_defaults(settings):
340
364
 
341
365
  settings.setdefault('src','path')
342
- settings.setdefault('tables',['cell', 'nucleus', 'pathogen', 'cytoplasm'])
366
+ settings.setdefault('tables', ['cell', 'nucleus', 'pathogen', 'cytoplasm'])
343
367
  settings.setdefault('dataset_mode','metadata')
344
368
  settings.setdefault('annotation_column','test')
345
369
  settings.setdefault('annotated_classes',[1,2])
@@ -457,7 +481,7 @@ def get_analyze_recruitment_default_settings(settings):
457
481
  settings.setdefault('pathogen_plate_metadata',[['c1', 'c2', 'c3'],['c4','c5', 'c6']])
458
482
  settings.setdefault('treatments',['cm', 'lovastatin'])
459
483
  settings.setdefault('treatment_plate_metadata',[['r1', 'r2','r3'], ['r4', 'r5','r6']])
460
- settings.setdefault('metadata_types',['column_name', 'column_name', 'row_name'])
484
+ #settings.setdefault('metadata_types',['column_name', 'column_name', 'row_name'])
461
485
  settings.setdefault('channel_dims',[0,1,2,3])
462
486
  settings.setdefault('cell_chann_dim',3)
463
487
  settings.setdefault('cell_mask_dim',4)
@@ -545,6 +569,7 @@ def get_perform_regression_default_settings(settings):
545
569
  settings.setdefault('log_x',False)
546
570
  settings.setdefault('log_y',False)
547
571
  settings.setdefault('x_lim',None)
572
+ settings.setdefault('outlier_detection',True)
548
573
  settings.setdefault('agg_type','mean')
549
574
  settings.setdefault('min_cell_count',None)
550
575
  settings.setdefault('regression_type','ols')
@@ -908,17 +933,25 @@ expected_types = {
908
933
  "offset_start":int,
909
934
  "chunk_size":int,
910
935
  "single_direction":str,
936
+ "delete_intermediate":bool,
937
+ "outlier_detection":bool,
938
+ "CP_prob":int,
939
+ "diameter":int,
940
+ "flow_threshold":float,
941
+ "cell_diamiter":int,
942
+ "nucleus_diamiter":int,
943
+ "pathogen_diamiter":int
911
944
  }
912
945
 
913
946
  categories = {"Paths":[ "src", "grna", "barcodes", "custom_model_path", "dataset","model_path","grna_csv","row_csv","column_csv", "metadata_files", "score_data","count_data"],
914
- "General": ["metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims", "apply_model_to_dataset", "generate_training_dataset", "train_DL_model", "segmentation_mode"],
947
+ "General": ["metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims", "apply_model_to_dataset", "generate_training_dataset", "train_DL_model", "segmentation_mode", "delete_intermediate"],
915
948
  "Cellpose":["fill_in","from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "invert", "diameter", "grayscale", "Signal_to_noise", "resize", "target_height", "target_width"],
916
- "Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "uninfected", "merge_edge_pathogen_cells", "adjust_cells", "cells", "cell_loc"],
917
- "Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
918
- "Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim", "pathogens", "pathogen_loc", "pathogen_types", "pathogen_plate_metadata", ],
949
+ "Cell": ["cell_diamiter","cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "uninfected", "merge_edge_pathogen_cells", "adjust_cells", "cells", "cell_loc"],
950
+ "Nucleus": ["nucleus_diamiter","nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
951
+ "Pathogen": ["pathogen_diamiter","pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim", "pathogens", "pathogen_loc", "pathogen_types", "pathogen_plate_metadata", ],
919
952
  "Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
920
953
  "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "crop_mode", "normalize", "use_bounding_box"],
921
- "Sequencing": ["offset_start","chunk_size","single_direction", "signal_direction","mode","comp_level","comp_type","save_h5","expected_end","offset","target_sequence","regex", "highlight"],
954
+ "Sequencing": ["outlier_detection","offset_start","chunk_size","single_direction", "signal_direction","mode","comp_level","comp_type","save_h5","expected_end","offset","target_sequence","regex", "highlight"],
922
955
  "Generate Dataset":["save_to_db","file_metadata","class_metadata", "annotation_column","annotated_classes", "dataset_mode", "metadata_type_by","custom_measurement", "sample", "size"],
923
956
  "Hyperparamiters (Training)": ["png_type", "score_threshold","file_type", "train_channels", "epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes", "augment", "amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory"],
924
957
  "Hyperparamiters (Embedding)": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
@@ -1032,6 +1065,9 @@ def generate_fields(variables, scrollable_frame):
1032
1065
  row = 1
1033
1066
  vars_dict = {}
1034
1067
  tooltips = {
1068
+ "cell_diamiter": "(int) - Diameter for cellpose objects to segment.",
1069
+ "nucleus_diamiter": "(int) - Diameter for cellpose objects to segment.",
1070
+ "pathogen_diamiter": "(int) - Diameter for cellpose objects to segment.",
1035
1071
  "adjust_cells": "(bool) - Adjust cell parameters for better segmentation.",
1036
1072
  "agg_type": "(str) - Type of aggregation to use for the data.",
1037
1073
  "alpha": "(float) - Alpha parameter for the regression model.",