spacr 0.3.81__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/__init__.py +2 -6
- spacr/core.py +27 -13
- spacr/deep_spacr.py +285 -5
- spacr/gui_core.py +69 -38
- spacr/gui_elements.py +193 -3
- spacr/gui_utils.py +1 -1
- spacr/io.py +5 -176
- spacr/measure.py +10 -6
- spacr/ml.py +369 -46
- spacr/plot.py +203 -92
- spacr/settings.py +53 -17
- spacr/sp_stats.py +221 -0
- spacr/submodules.py +283 -2
- spacr/toxo.py +98 -75
- spacr/utils.py +144 -52
- {spacr-0.3.81.dist-info → spacr-0.4.1.dist-info}/METADATA +2 -1
- {spacr-0.3.81.dist-info → spacr-0.4.1.dist-info}/RECORD +21 -20
- {spacr-0.3.81.dist-info → spacr-0.4.1.dist-info}/LICENSE +0 -0
- {spacr-0.3.81.dist-info → spacr-0.4.1.dist-info}/WHEEL +0 -0
- {spacr-0.3.81.dist-info → spacr-0.4.1.dist-info}/entry_points.txt +0 -0
- {spacr-0.3.81.dist-info → spacr-0.4.1.dist-info}/top_level.txt +0 -0
spacr/plot.py
CHANGED
@@ -32,6 +32,7 @@ from IPython.display import Image as ipyimage
|
|
32
32
|
import matplotlib.patches as patches
|
33
33
|
from collections import defaultdict
|
34
34
|
from matplotlib.gridspec import GridSpec
|
35
|
+
from matplotlib_venn import venn2
|
35
36
|
|
36
37
|
#filter_dict={'cell':[(0,100000), (0, 65000)],'nucleus':[(3000,100000), (1500, 65000)],'pathogen':[(500,100000), (0, 65000)]}
|
37
38
|
def plot_image_mask_overlay(
|
@@ -1381,11 +1382,11 @@ def _plot_recruitment(df, df_type, channel_of_interest, columns=[], figuresize=1
|
|
1381
1382
|
axes[3].set_xlabel(f'pathogen {df_type}', fontsize=font)
|
1382
1383
|
axes[3].set_ylabel(f'pathogen_channel_{channel_of_interest}_mean_intensity', fontsize=font)
|
1383
1384
|
|
1384
|
-
axes[0].legend_.remove()
|
1385
|
-
axes[1].legend_.remove()
|
1386
|
-
axes[2].legend_.remove()
|
1387
|
-
axes[3].legend_.remove()
|
1388
|
-
|
1385
|
+
#axes[0].legend_.remove()
|
1386
|
+
#axes[1].legend_.remove()
|
1387
|
+
#axes[2].legend_.remove()
|
1388
|
+
#axes[3].legend_.remove()
|
1389
|
+
|
1389
1390
|
handles, labels = axes[3].get_legend_handles_labels()
|
1390
1391
|
axes[3].legend(handles, labels, bbox_to_anchor=(1.05, 0.5), loc='center left')
|
1391
1392
|
for i in [0,1,2,3]:
|
@@ -2043,7 +2044,9 @@ def plot_histogram(df, column, dst=None):
|
|
2043
2044
|
|
2044
2045
|
plt.show()
|
2045
2046
|
|
2046
|
-
def plot_lorenz_curves(csv_files, name_column='grna_name', value_column='count',
|
2047
|
+
def plot_lorenz_curves(csv_files, name_column='grna_name', value_column='count',
|
2048
|
+
remove_keys=None,
|
2049
|
+
x_lim=[0.0, 1], y_lim=[0, 1], remove_outliers=False, save=True):
|
2047
2050
|
|
2048
2051
|
def lorenz_curve(data):
|
2049
2052
|
"""Calculate Lorenz curve."""
|
@@ -2053,34 +2056,64 @@ def plot_lorenz_curves(csv_files, name_column='grna_name', value_column='count',
|
|
2053
2056
|
lorenz_curve = np.insert(lorenz_curve, 0, 0)
|
2054
2057
|
return lorenz_curve
|
2055
2058
|
|
2059
|
+
def gini_coefficient(data):
|
2060
|
+
"""Calculate Gini coefficient from data."""
|
2061
|
+
sorted_data = np.sort(data)
|
2062
|
+
n = len(data)
|
2063
|
+
cumulative_data = np.cumsum(sorted_data) / np.sum(sorted_data)
|
2064
|
+
cumulative_data = np.insert(cumulative_data, 0, 0)
|
2065
|
+
gini = 1 - 2 * np.sum(cumulative_data[:-1] * np.diff(np.linspace(0, 1, n + 1)))
|
2066
|
+
return gini
|
2067
|
+
|
2068
|
+
def remove_outliers_by_wells(data, name_col, wells_col):
|
2069
|
+
"""Remove outliers based on 95% confidence interval for well counts."""
|
2070
|
+
well_counts = data.groupby(name_col).size()
|
2071
|
+
q1 = well_counts.quantile(0.05)
|
2072
|
+
q3 = well_counts.quantile(0.95)
|
2073
|
+
iqr_range = q3 - q1
|
2074
|
+
lower_bound = q1 - 1.5 * iqr_range
|
2075
|
+
upper_bound = q3 + 1.5 * iqr_range
|
2076
|
+
valid_names = well_counts[(well_counts >= lower_bound) & (well_counts <= upper_bound)].index
|
2077
|
+
return data[data[name_col].isin(valid_names)]
|
2078
|
+
|
2056
2079
|
combined_data = []
|
2080
|
+
gini_values = {}
|
2057
2081
|
|
2058
2082
|
plt.figure(figsize=(10, 10))
|
2059
2083
|
|
2060
2084
|
for idx, csv_file in enumerate(csv_files):
|
2061
|
-
if idx == 1:
|
2062
|
-
save_fldr = os.path.dirname(csv_file)
|
2063
|
-
save_path = os.path.join(save_fldr, 'lorenz_curve.pdf')
|
2064
|
-
|
2065
2085
|
df = pd.read_csv(csv_file)
|
2086
|
+
|
2087
|
+
# Remove specified keys
|
2066
2088
|
for remove in remove_keys:
|
2067
2089
|
df = df[df[name_column] != remove]
|
2068
2090
|
|
2091
|
+
# Remove outliers
|
2092
|
+
if remove_outliers:
|
2093
|
+
df = remove_outliers_by_wells(df, name_column, value_column)
|
2094
|
+
|
2069
2095
|
values = df[value_column].values
|
2070
2096
|
combined_data.extend(values)
|
2071
2097
|
|
2098
|
+
# Calculate Lorenz curve and Gini coefficient
|
2072
2099
|
lorenz = lorenz_curve(values)
|
2073
|
-
|
2100
|
+
gini = gini_coefficient(values)
|
2101
|
+
gini_values[f"plate {idx+1}"] = gini
|
2102
|
+
|
2103
|
+
name = f"plate {idx+1} (Gini: {gini:.4f})"
|
2074
2104
|
plt.plot(np.linspace(0, 1, len(lorenz)), lorenz, label=name)
|
2075
2105
|
|
2076
2106
|
# Plot combined Lorenz curve
|
2077
2107
|
combined_lorenz = lorenz_curve(np.array(combined_data))
|
2078
|
-
|
2108
|
+
combined_gini = gini_coefficient(np.array(combined_data))
|
2109
|
+
gini_values["Combined"] = combined_gini
|
2110
|
+
|
2111
|
+
plt.plot(np.linspace(0, 1, len(combined_lorenz)), combined_lorenz, label=f"Combined (Gini: {combined_gini:.4f})", linestyle='--', color='black')
|
2079
2112
|
|
2080
|
-
if x_lim
|
2113
|
+
if x_lim is not None:
|
2081
2114
|
plt.xlim(x_lim)
|
2082
2115
|
|
2083
|
-
if y_lim
|
2116
|
+
if y_lim is not None:
|
2084
2117
|
plt.ylim(y_lim)
|
2085
2118
|
|
2086
2119
|
plt.title('Lorenz Curves')
|
@@ -2092,10 +2125,15 @@ def plot_lorenz_curves(csv_files, name_column='grna_name', value_column='count',
|
|
2092
2125
|
if save:
|
2093
2126
|
save_path = os.path.join(os.path.dirname(csv_files[0]), 'results')
|
2094
2127
|
os.makedirs(save_path, exist_ok=True)
|
2095
|
-
save_file_path = os.path.join(save_path, '
|
2128
|
+
save_file_path = os.path.join(save_path, 'lorenz_curve_with_gini.pdf')
|
2096
2129
|
plt.savefig(save_file_path, format='pdf', bbox_inches='tight')
|
2097
2130
|
print(f"Saved Lorenz Curve: {save_file_path}")
|
2098
|
-
|
2131
|
+
|
2132
|
+
plt.show()
|
2133
|
+
|
2134
|
+
# Print Gini coefficients
|
2135
|
+
for plate, gini in gini_values.items():
|
2136
|
+
print(f"{plate}: Gini Coefficient = {gini:.4f}")
|
2099
2137
|
|
2100
2138
|
def plot_permutation(permutation_df):
|
2101
2139
|
num_features = len(permutation_df)
|
@@ -2484,21 +2522,79 @@ class spacrGraph:
|
|
2484
2522
|
plt.show()
|
2485
2523
|
return reordered_palette
|
2486
2524
|
|
2525
|
+
#def preprocess_data(self):
|
2526
|
+
# """Preprocess the data: remove NaNs, sort/order the grouping column, and optionally group by 'prc'."""
|
2527
|
+
# # Remove NaNs in both the grouping column and each data column
|
2528
|
+
# df = self.df.dropna(subset=[self.grouping_column] + self.data_column)
|
2529
|
+
# # Group by 'prc' column if representation is 'well'
|
2530
|
+
# if self.representation == 'well':
|
2531
|
+
# df = df.groupby(['prc', self.grouping_column])[self.data_column].agg(self.summary_func).reset_index()
|
2532
|
+
# if self.representation == 'plate':
|
2533
|
+
# df = df.groupby(['plate', self.grouping_column])[self.data_column].agg(self.summary_func).reset_index()
|
2534
|
+
# if self.order:
|
2535
|
+
# df[self.grouping_column] = pd.Categorical(df[self.grouping_column], categories=self.order, ordered=True)
|
2536
|
+
# else:
|
2537
|
+
# df[self.grouping_column] = pd.Categorical(df[self.grouping_column], categories=sorted(df[self.grouping_column].unique()), ordered=True)
|
2538
|
+
# return df
|
2539
|
+
|
2487
2540
|
def preprocess_data(self):
|
2488
|
-
"""
|
2489
|
-
|
2541
|
+
"""
|
2542
|
+
Preprocess the data: remove NaNs, optionally ensure 'plate' column is created,
|
2543
|
+
then group by either 'prc', 'plate', or do no grouping at all if representation == 'object'.
|
2544
|
+
"""
|
2545
|
+
# 1) Remove NaNs in both the grouping column and each data column
|
2490
2546
|
df = self.df.dropna(subset=[self.grouping_column] + self.data_column)
|
2491
|
-
|
2492
|
-
|
2493
|
-
|
2494
|
-
|
2495
|
-
|
2496
|
-
|
2497
|
-
|
2547
|
+
|
2548
|
+
# 2) Decide how to handle grouping based on 'representation'
|
2549
|
+
if self.representation == 'object':
|
2550
|
+
# -- No grouping at all --
|
2551
|
+
# We do nothing except keep df as-is after removing NaNs
|
2552
|
+
group_cols = None
|
2553
|
+
|
2554
|
+
elif self.representation == 'well':
|
2555
|
+
# Group by ['prc', grouping_column]
|
2556
|
+
group_cols = ['prc', self.grouping_column]
|
2557
|
+
|
2558
|
+
elif self.representation == 'plate':
|
2559
|
+
# Make sure 'plate' exists (split from 'prc' if needed)
|
2560
|
+
if 'plate' not in df.columns:
|
2561
|
+
if 'prc' in df.columns:
|
2562
|
+
df[['plate', 'row', 'column']] = df['prc'].str.split('_', expand=True)
|
2563
|
+
else:
|
2564
|
+
raise KeyError(
|
2565
|
+
"Representation is 'plate', but no 'plate' column found. "
|
2566
|
+
"Also cannot split from 'prc' because 'prc' column is missing."
|
2567
|
+
)
|
2568
|
+
# If the grouping column IS 'plate', only group by ['plate'] once
|
2569
|
+
if self.grouping_column == 'plate':
|
2570
|
+
group_cols = ['plate']
|
2571
|
+
else:
|
2572
|
+
group_cols = ['plate', self.grouping_column]
|
2573
|
+
|
2498
2574
|
else:
|
2499
|
-
|
2500
|
-
|
2575
|
+
raise ValueError(f"Unknown representation: {self.representation}")
|
2576
|
+
|
2577
|
+
# 3) Perform grouping only if group_cols is set
|
2578
|
+
if group_cols is not None:
|
2579
|
+
df = df.groupby(group_cols)[self.data_column].agg(self.summary_func).reset_index()
|
2580
|
+
|
2581
|
+
# 4) Handle ordering if specified (and if the grouping_column still exists)
|
2582
|
+
if self.order and (self.grouping_column in df.columns):
|
2583
|
+
df[self.grouping_column] = pd.Categorical(
|
2584
|
+
df[self.grouping_column],
|
2585
|
+
categories=self.order,
|
2586
|
+
ordered=True
|
2587
|
+
)
|
2588
|
+
elif (self.grouping_column in df.columns):
|
2589
|
+
# Default to sorting unique values
|
2590
|
+
df[self.grouping_column] = pd.Categorical(
|
2591
|
+
df[self.grouping_column],
|
2592
|
+
categories=sorted(df[self.grouping_column].unique()),
|
2593
|
+
ordered=True
|
2594
|
+
)
|
2501
2595
|
|
2596
|
+
return df
|
2597
|
+
|
2502
2598
|
def remove_outliers_from_plot(self):
|
2503
2599
|
"""Remove outliers from the plot but keep them in the data."""
|
2504
2600
|
filtered_df = self.df.copy()
|
@@ -2609,7 +2705,7 @@ class spacrGraph:
|
|
2609
2705
|
def perform_posthoc_tests(self, is_normal, unique_groups):
|
2610
2706
|
"""Perform post-hoc tests for multiple groups based on all_to_all flag."""
|
2611
2707
|
|
2612
|
-
from .
|
2708
|
+
from .sp_stats import choose_p_adjust_method
|
2613
2709
|
|
2614
2710
|
posthoc_results = []
|
2615
2711
|
if is_normal and len(unique_groups) > 2 and self.all_to_all:
|
@@ -2900,6 +2996,11 @@ class spacrGraph:
|
|
2900
2996
|
|
2901
2997
|
# Set figure size to ensure it remains square with a minimum size
|
2902
2998
|
fig_size = max(6, num_groups * 2) / correction_factor
|
2999
|
+
|
3000
|
+
if fig_size < 10:
|
3001
|
+
fig_size = 10
|
3002
|
+
|
3003
|
+
|
2903
3004
|
ax.figure.set_size_inches(fig_size, fig_size)
|
2904
3005
|
|
2905
3006
|
# Configure layout based on the number of groups
|
@@ -2948,66 +3049,6 @@ class spacrGraph:
|
|
2948
3049
|
# Redraw the figure to apply changes
|
2949
3050
|
ax.figure.canvas.draw()
|
2950
3051
|
|
2951
|
-
def _standerdize_figure_format_v1(self, ax, num_groups, graph_type):
|
2952
|
-
"""
|
2953
|
-
Adjusts the figure layout (size, bar width, jitter, and spacing) based on the number of groups.
|
2954
|
-
"""
|
2955
|
-
if graph_type in ['line', 'line_std']:
|
2956
|
-
print("Skipping layout adjustment for line graphs.")
|
2957
|
-
return # Skip layout adjustment for line graphs
|
2958
|
-
|
2959
|
-
correction_factor = 4
|
2960
|
-
|
2961
|
-
# Set figure size to ensure it remains square with a minimum size
|
2962
|
-
fig_size = max(6, num_groups * 2) / correction_factor
|
2963
|
-
ax.figure.set_size_inches(fig_size, fig_size)
|
2964
|
-
|
2965
|
-
# Configure layout based on the number of groups
|
2966
|
-
bar_width = min(0.8, 1.5 / num_groups) / correction_factor
|
2967
|
-
jitter_amount = min(0.1, 0.2 / num_groups) / correction_factor
|
2968
|
-
jitter_size = max(50 / num_groups, 200)
|
2969
|
-
|
2970
|
-
# Adjust x-axis limits to fit the specified order of groups
|
2971
|
-
ax.set_xlim(-0.5, len(self.order) - 0.5) # Use `self.order` length to ensure alignment
|
2972
|
-
|
2973
|
-
# Use `self.order` as the x-tick labels to maintain consistent ordering
|
2974
|
-
ax.set_xticks(range(len(self.order)))
|
2975
|
-
#ax.set_xticklabels(self.order, rotation=45, ha='right')
|
2976
|
-
plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
|
2977
|
-
|
2978
|
-
# Customize elements based on the graph type
|
2979
|
-
if graph_type == 'bar':
|
2980
|
-
# Adjust bars' width and position
|
2981
|
-
for bar in ax.patches:
|
2982
|
-
bar.set_width(bar_width)
|
2983
|
-
bar.set_x(bar.get_x() - bar_width / 2)
|
2984
|
-
|
2985
|
-
elif graph_type in ['jitter', 'jitter_bar', 'jitter_box']:
|
2986
|
-
# Adjust jitter points' position and size
|
2987
|
-
for coll in ax.collections:
|
2988
|
-
offsets = coll.get_offsets()
|
2989
|
-
offsets[:, 0] += jitter_amount # Shift jitter points slightly
|
2990
|
-
coll.set_offsets(offsets)
|
2991
|
-
coll.set_sizes([jitter_size] * len(offsets)) # Adjust point size dynamically
|
2992
|
-
|
2993
|
-
elif graph_type in ['box', 'violin']:
|
2994
|
-
# Adjust box width for consistent spacing
|
2995
|
-
for artist in ax.artists:
|
2996
|
-
artist.set_width(bar_width)
|
2997
|
-
|
2998
|
-
# Adjust legend and axis labels
|
2999
|
-
ax.tick_params(axis='x', labelsize=max(10, 15 - num_groups // 2))
|
3000
|
-
ax.tick_params(axis='y', labelsize=max(10, 15 - num_groups // 2))
|
3001
|
-
|
3002
|
-
# Adjust legend placement and size
|
3003
|
-
if ax.get_legend():
|
3004
|
-
ax.get_legend().set_bbox_to_anchor((1.05, 1))
|
3005
|
-
ax.get_legend().prop.set_size(max(8, 12 - num_groups // 3))
|
3006
|
-
|
3007
|
-
# Redraw the figure to apply changes
|
3008
|
-
ax.figure.canvas.draw()
|
3009
|
-
|
3010
|
-
|
3011
3052
|
def _create_bar_plot(self, ax):
|
3012
3053
|
"""Helper method to create a bar plot with consistent bar thickness and centered error bars."""
|
3013
3054
|
# Flatten DataFrame: Combine grouping column and data column into one group if needed
|
@@ -3328,7 +3369,7 @@ def plot_data_from_db(settings):
|
|
3328
3369
|
[df1] = _read_db(db_loc, tables=[settings['table_names']])
|
3329
3370
|
else:
|
3330
3371
|
df1, _ = _read_and_merge_data(locs=[db_loc],
|
3331
|
-
tables = settings['
|
3372
|
+
tables = settings['table_names'],
|
3332
3373
|
verbose=settings['verbose'],
|
3333
3374
|
nuclei_limit=settings['nuclei_limit'],
|
3334
3375
|
pathogen_limit=settings['pathogen_limit'])
|
@@ -3355,6 +3396,13 @@ def plot_data_from_db(settings):
|
|
3355
3396
|
df = df.dropna(subset='treatment')
|
3356
3397
|
|
3357
3398
|
df = df.dropna(subset=settings['data_column'])
|
3399
|
+
|
3400
|
+
if settings['grouping_column'] not in df.columns:
|
3401
|
+
print(f"Grouping column {settings['grouping_column']} not found in DataFrame.")
|
3402
|
+
print(f'Please use one of the following columns: {df.columns}')
|
3403
|
+
display(df)
|
3404
|
+
return None
|
3405
|
+
|
3358
3406
|
df = df.dropna(subset=settings['grouping_column'])
|
3359
3407
|
|
3360
3408
|
src = srcs[0]
|
@@ -3410,8 +3458,6 @@ def plot_data_from_csv(settings):
|
|
3410
3458
|
else:
|
3411
3459
|
raise ValueError("src must be a string or a list of strings.")
|
3412
3460
|
|
3413
|
-
#save_settings(settings, name=f"{settings['graph_name']}_plot_settings_csv", show=True)
|
3414
|
-
|
3415
3461
|
dfs = []
|
3416
3462
|
for i, src in enumerate(srcs):
|
3417
3463
|
dft = pd.read_csv(src)
|
@@ -3421,7 +3467,17 @@ def plot_data_from_csv(settings):
|
|
3421
3467
|
dfs.append(dft)
|
3422
3468
|
|
3423
3469
|
df = pd.concat(dfs, axis=0)
|
3470
|
+
|
3471
|
+
if 'prc' in df.columns:
|
3472
|
+
# Check if 'plate', 'row', and 'column' are all missing from df.columns
|
3473
|
+
if not all(col in df.columns for col in ['plate', 'row_name', 'column_name']):
|
3474
|
+
try:
|
3475
|
+
# Split 'prc' into 'plate', 'row', and 'column'
|
3476
|
+
df[['plate', 'row_name', 'column_name']] = df['prc'].str.split('_', expand=True)
|
3477
|
+
except Exception as e:
|
3478
|
+
print(f"Could not split the prc column: {e}")
|
3424
3479
|
|
3480
|
+
|
3425
3481
|
display(df)
|
3426
3482
|
|
3427
3483
|
df = df.dropna(subset=settings['data_column'])
|
@@ -3759,7 +3815,7 @@ def plot_proportion_stacked_bars(settings, df, group_column, bin_column, prc_col
|
|
3759
3815
|
- pairwise_results (list): Pairwise test results from `chi_pairwise`.
|
3760
3816
|
"""
|
3761
3817
|
|
3762
|
-
from .
|
3818
|
+
from .sp_stats import chi_pairwise
|
3763
3819
|
|
3764
3820
|
# Calculate contingency table for overall chi-squared test
|
3765
3821
|
raw_counts = df.groupby([group_column, bin_column]).size().unstack(fill_value=0)
|
@@ -3812,3 +3868,58 @@ def plot_proportion_stacked_bars(settings, df, group_column, bin_column, prc_col
|
|
3812
3868
|
})
|
3813
3869
|
|
3814
3870
|
return results_df, pairwise_results, fig
|
3871
|
+
|
3872
|
+
def create_venn_diagram(file1, file2, gene_column="gene", filter_coeff=0.1, save=True, save_path=None):
|
3873
|
+
"""
|
3874
|
+
Reads two CSV files, extracts the `gene` column, and creates a Venn diagram
|
3875
|
+
to show overlapping and non-overlapping genes.
|
3876
|
+
|
3877
|
+
Parameters:
|
3878
|
+
file1 (str): Path to the first CSV file.
|
3879
|
+
file2 (str): Path to the second CSV file.
|
3880
|
+
gene_column (str): Name of the column containing gene data (default: "gene").
|
3881
|
+
filter_coeff (float): Coefficient threshold for filtering genes.
|
3882
|
+
save (bool): Whether to save the plot.
|
3883
|
+
save_path (str): Path to save the Venn diagram figure.
|
3884
|
+
|
3885
|
+
Returns:
|
3886
|
+
dict: Overlapping and non-overlapping genes.
|
3887
|
+
"""
|
3888
|
+
# Read CSV files
|
3889
|
+
df1 = pd.read_csv(file1)
|
3890
|
+
df2 = pd.read_csv(file2)
|
3891
|
+
|
3892
|
+
# Filter based on coefficient
|
3893
|
+
if filter_coeff is not None:
|
3894
|
+
df1 = df1[df1['coefficient'] > filter_coeff] if filter_coeff >= 0 else df1[df1['coefficient'] < filter_coeff]
|
3895
|
+
df2 = df2[df2['coefficient'] > filter_coeff] if filter_coeff >= 0 else df2[df2['coefficient'] < filter_coeff]
|
3896
|
+
|
3897
|
+
# Extract gene columns and drop NaN values
|
3898
|
+
genes1 = set(df1[gene_column].dropna())
|
3899
|
+
genes2 = set(df2[gene_column].dropna())
|
3900
|
+
|
3901
|
+
# Calculate overlapping and non-overlapping genes
|
3902
|
+
overlapping_genes = genes1.intersection(genes2)
|
3903
|
+
unique_to_file1 = genes1.difference(genes2)
|
3904
|
+
unique_to_file2 = genes2.difference(genes1)
|
3905
|
+
|
3906
|
+
# Create a Venn diagram
|
3907
|
+
plt.figure(figsize=(8, 6))
|
3908
|
+
venn = venn2([genes1, genes2], ('File 1 Genes', 'File 2 Genes'))
|
3909
|
+
plt.title("Venn Diagram of Overlapping Genes")
|
3910
|
+
|
3911
|
+
# Save or show the figure
|
3912
|
+
if save:
|
3913
|
+
if save_path is None:
|
3914
|
+
raise ValueError("save_path must be provided when save=True.")
|
3915
|
+
plt.savefig(save_path, dpi=300, bbox_inches="tight", format='pdf')
|
3916
|
+
print(f"Venn diagram saved to {save_path}")
|
3917
|
+
else:
|
3918
|
+
plt.show()
|
3919
|
+
|
3920
|
+
# Return the results
|
3921
|
+
return {
|
3922
|
+
"overlap": list(overlapping_genes),
|
3923
|
+
"unique_to_file1": list(unique_to_file1),
|
3924
|
+
"unique_to_file2": list(unique_to_file2)
|
3925
|
+
}
|
spacr/settings.py
CHANGED
@@ -24,15 +24,10 @@ def set_default_plot_merge_settings():
|
|
24
24
|
settings.setdefault('verbose', True)
|
25
25
|
return settings
|
26
26
|
|
27
|
-
def set_default_settings_preprocess_generate_masks(
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
else:
|
32
|
-
settings.setdefault('src', 'path')
|
33
|
-
if 'src' not in settings:
|
34
|
-
settings['src'] = 'path'
|
35
|
-
|
27
|
+
def set_default_settings_preprocess_generate_masks(settings={}):
|
28
|
+
|
29
|
+
settings.setdefault('src', 'path')
|
30
|
+
settings.setdefault('delete_intermediate', False)
|
36
31
|
settings.setdefault('segmentation_mode', 'cellpose')
|
37
32
|
settings.setdefault('preprocess', True)
|
38
33
|
settings.setdefault('masks', True)
|
@@ -49,6 +44,10 @@ def set_default_settings_preprocess_generate_masks(src, settings={}):
|
|
49
44
|
settings.setdefault('remove_background_cell', False)
|
50
45
|
settings.setdefault('remove_background_nucleus', False)
|
51
46
|
settings.setdefault('remove_background_pathogen', False)
|
47
|
+
|
48
|
+
settings.setdefault('cell_diamiter', None)
|
49
|
+
settings.setdefault('nucleus_diamiter', None)
|
50
|
+
settings.setdefault('pathogen_diamiter', None)
|
52
51
|
|
53
52
|
# Channel settings
|
54
53
|
settings.setdefault('cell_channel', None)
|
@@ -90,7 +89,7 @@ def set_default_settings_preprocess_generate_masks(src, settings={}):
|
|
90
89
|
settings.setdefault('timelapse_frame_limits', None)
|
91
90
|
settings.setdefault('timelapse_remove_transient', False)
|
92
91
|
settings.setdefault('timelapse_mode', 'trackpy')
|
93
|
-
settings.setdefault('timelapse_objects',
|
92
|
+
settings.setdefault('timelapse_objects', None)
|
94
93
|
|
95
94
|
# Misc settings
|
96
95
|
settings.setdefault('all_to_mip', False)
|
@@ -147,12 +146,27 @@ def _get_object_settings(object_type, settings):
|
|
147
146
|
object_settings['filter_size'] = False
|
148
147
|
object_settings['filter_intensity'] = False
|
149
148
|
object_settings['restore_type'] = settings.get('cell_restore_type', None)
|
149
|
+
if settings['cell_diamiter'] is not None:
|
150
|
+
if isinstance(settings['cell_diamiter'], (int, float)):
|
151
|
+
object_settings['diameter'] = settings['cell_diamiter']
|
152
|
+
object_settings['minimum_size'] = (object_settings['diameter']**2)/4
|
153
|
+
object_settings['maximum_size'] = (object_settings['diameter']**2)*10
|
154
|
+
else:
|
155
|
+
print(f'Cell diameter must be an integer or float, got {settings["cell_diamiter"]}')
|
150
156
|
|
151
157
|
elif object_type == 'nucleus':
|
152
158
|
object_settings['model_name'] = 'nuclei'
|
153
159
|
object_settings['filter_size'] = False
|
154
160
|
object_settings['filter_intensity'] = False
|
155
161
|
object_settings['restore_type'] = settings.get('nucleus_restore_type', None)
|
162
|
+
|
163
|
+
if settings['nucleus_diamiter'] is not None:
|
164
|
+
if isinstance(settings['nucleus_diamiter'], (int, float)):
|
165
|
+
object_settings['diameter'] = settings['nucleus_diamiter']
|
166
|
+
object_settings['minimum_size'] = (object_settings['diameter']**2)/4
|
167
|
+
object_settings['maximum_size'] = (object_settings['diameter']**2)*10
|
168
|
+
else:
|
169
|
+
print(f'Nucleus diameter must be an integer or float, got {settings["nucleus_diamiter"]}')
|
156
170
|
|
157
171
|
elif object_type == 'pathogen':
|
158
172
|
object_settings['model_name'] = 'cyto'
|
@@ -162,6 +176,14 @@ def _get_object_settings(object_type, settings):
|
|
162
176
|
object_settings['restore_type'] = settings.get('pathogen_restore_type', None)
|
163
177
|
object_settings['merge'] = settings['merge_pathogens']
|
164
178
|
|
179
|
+
if settings['pathogen_diamiter'] is not None:
|
180
|
+
if isinstance(settings['pathogen_diamiter'], (int, float)):
|
181
|
+
object_settings['diameter'] = settings['pathogen_diamiter']
|
182
|
+
object_settings['minimum_size'] = (object_settings['diameter']**2)/4
|
183
|
+
object_settings['maximum_size'] = (object_settings['diameter']**2)*10
|
184
|
+
else:
|
185
|
+
print(f'Pathogen diameter must be an integer or float, got {settings["pathogen_diamiter"]}')
|
186
|
+
|
165
187
|
else:
|
166
188
|
print(f'Object type: {object_type} not supported. Supported object types are : cell, nucleus and pathogen')
|
167
189
|
|
@@ -216,6 +238,8 @@ def set_default_umap_image_settings(settings={}):
|
|
216
238
|
def get_measure_crop_settings(settings={}):
|
217
239
|
|
218
240
|
settings.setdefault('src', 'path')
|
241
|
+
settings.setdefault('delete_intermediate', False)
|
242
|
+
|
219
243
|
settings.setdefault('verbose', False)
|
220
244
|
settings.setdefault('experiment', 'exp')
|
221
245
|
|
@@ -339,7 +363,7 @@ def set_default_train_test_model(settings):
|
|
339
363
|
def set_generate_training_dataset_defaults(settings):
|
340
364
|
|
341
365
|
settings.setdefault('src','path')
|
342
|
-
settings.setdefault('tables',['cell', 'nucleus', 'pathogen', 'cytoplasm'])
|
366
|
+
settings.setdefault('tables', ['cell', 'nucleus', 'pathogen', 'cytoplasm'])
|
343
367
|
settings.setdefault('dataset_mode','metadata')
|
344
368
|
settings.setdefault('annotation_column','test')
|
345
369
|
settings.setdefault('annotated_classes',[1,2])
|
@@ -457,7 +481,7 @@ def get_analyze_recruitment_default_settings(settings):
|
|
457
481
|
settings.setdefault('pathogen_plate_metadata',[['c1', 'c2', 'c3'],['c4','c5', 'c6']])
|
458
482
|
settings.setdefault('treatments',['cm', 'lovastatin'])
|
459
483
|
settings.setdefault('treatment_plate_metadata',[['r1', 'r2','r3'], ['r4', 'r5','r6']])
|
460
|
-
settings.setdefault('metadata_types',['column_name', 'column_name', 'row_name'])
|
484
|
+
#settings.setdefault('metadata_types',['column_name', 'column_name', 'row_name'])
|
461
485
|
settings.setdefault('channel_dims',[0,1,2,3])
|
462
486
|
settings.setdefault('cell_chann_dim',3)
|
463
487
|
settings.setdefault('cell_mask_dim',4)
|
@@ -545,6 +569,7 @@ def get_perform_regression_default_settings(settings):
|
|
545
569
|
settings.setdefault('log_x',False)
|
546
570
|
settings.setdefault('log_y',False)
|
547
571
|
settings.setdefault('x_lim',None)
|
572
|
+
settings.setdefault('outlier_detection',True)
|
548
573
|
settings.setdefault('agg_type','mean')
|
549
574
|
settings.setdefault('min_cell_count',None)
|
550
575
|
settings.setdefault('regression_type','ols')
|
@@ -908,17 +933,25 @@ expected_types = {
|
|
908
933
|
"offset_start":int,
|
909
934
|
"chunk_size":int,
|
910
935
|
"single_direction":str,
|
936
|
+
"delete_intermediate":bool,
|
937
|
+
"outlier_detection":bool,
|
938
|
+
"CP_prob":int,
|
939
|
+
"diameter":int,
|
940
|
+
"flow_threshold":float,
|
941
|
+
"cell_diamiter":int,
|
942
|
+
"nucleus_diamiter":int,
|
943
|
+
"pathogen_diamiter":int
|
911
944
|
}
|
912
945
|
|
913
946
|
categories = {"Paths":[ "src", "grna", "barcodes", "custom_model_path", "dataset","model_path","grna_csv","row_csv","column_csv", "metadata_files", "score_data","count_data"],
|
914
|
-
"General": ["metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims", "apply_model_to_dataset", "generate_training_dataset", "train_DL_model", "segmentation_mode"],
|
947
|
+
"General": ["metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims", "apply_model_to_dataset", "generate_training_dataset", "train_DL_model", "segmentation_mode", "delete_intermediate"],
|
915
948
|
"Cellpose":["fill_in","from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "invert", "diameter", "grayscale", "Signal_to_noise", "resize", "target_height", "target_width"],
|
916
|
-
"Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "uninfected", "merge_edge_pathogen_cells", "adjust_cells", "cells", "cell_loc"],
|
917
|
-
"Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
|
918
|
-
"Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim", "pathogens", "pathogen_loc", "pathogen_types", "pathogen_plate_metadata", ],
|
949
|
+
"Cell": ["cell_diamiter","cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "uninfected", "merge_edge_pathogen_cells", "adjust_cells", "cells", "cell_loc"],
|
950
|
+
"Nucleus": ["nucleus_diamiter","nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
|
951
|
+
"Pathogen": ["pathogen_diamiter","pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim", "pathogens", "pathogen_loc", "pathogen_types", "pathogen_plate_metadata", ],
|
919
952
|
"Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
|
920
953
|
"Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "crop_mode", "normalize", "use_bounding_box"],
|
921
|
-
"Sequencing": ["offset_start","chunk_size","single_direction", "signal_direction","mode","comp_level","comp_type","save_h5","expected_end","offset","target_sequence","regex", "highlight"],
|
954
|
+
"Sequencing": ["outlier_detection","offset_start","chunk_size","single_direction", "signal_direction","mode","comp_level","comp_type","save_h5","expected_end","offset","target_sequence","regex", "highlight"],
|
922
955
|
"Generate Dataset":["save_to_db","file_metadata","class_metadata", "annotation_column","annotated_classes", "dataset_mode", "metadata_type_by","custom_measurement", "sample", "size"],
|
923
956
|
"Hyperparamiters (Training)": ["png_type", "score_threshold","file_type", "train_channels", "epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes", "augment", "amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory"],
|
924
957
|
"Hyperparamiters (Embedding)": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
|
@@ -1032,6 +1065,9 @@ def generate_fields(variables, scrollable_frame):
|
|
1032
1065
|
row = 1
|
1033
1066
|
vars_dict = {}
|
1034
1067
|
tooltips = {
|
1068
|
+
"cell_diamiter": "(int) - Diameter for cellpose objects to segment.",
|
1069
|
+
"nucleus_diamiter": "(int) - Diameter for cellpose objects to segment.",
|
1070
|
+
"pathogen_diamiter": "(int) - Diameter for cellpose objects to segment.",
|
1035
1071
|
"adjust_cells": "(bool) - Adjust cell parameters for better segmentation.",
|
1036
1072
|
"agg_type": "(str) - Type of aggregation to use for the data.",
|
1037
1073
|
"alpha": "(float) - Alpha parameter for the regression model.",
|