spacr 0.3.80__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/__init__.py +0 -4
- spacr/core.py +27 -13
- spacr/deep_spacr.py +378 -5
- spacr/gui_core.py +82 -20
- spacr/gui_elements.py +192 -3
- spacr/gui_utils.py +1 -1
- spacr/io.py +5 -176
- spacr/measure.py +10 -6
- spacr/ml.py +369 -46
- spacr/plot.py +201 -90
- spacr/settings.py +80 -21
- spacr/submodules.py +282 -1
- spacr/toxo.py +98 -75
- spacr/utils.py +144 -49
- {spacr-0.3.80.dist-info → spacr-0.4.0.dist-info}/METADATA +2 -1
- {spacr-0.3.80.dist-info → spacr-0.4.0.dist-info}/RECORD +20 -20
- {spacr-0.3.80.dist-info → spacr-0.4.0.dist-info}/LICENSE +0 -0
- {spacr-0.3.80.dist-info → spacr-0.4.0.dist-info}/WHEEL +0 -0
- {spacr-0.3.80.dist-info → spacr-0.4.0.dist-info}/entry_points.txt +0 -0
- {spacr-0.3.80.dist-info → spacr-0.4.0.dist-info}/top_level.txt +0 -0
spacr/utils.py
CHANGED
@@ -1372,40 +1372,6 @@ def annotate_conditions(df, cells=None, cell_loc=None, pathogens=None, pathogen_
|
|
1372
1372
|
|
1373
1373
|
return df
|
1374
1374
|
|
1375
|
-
def _split_data_v1(df, group_by, object_type):
|
1376
|
-
"""
|
1377
|
-
Splits the input dataframe into numeric and non-numeric parts, groups them by the specified column,
|
1378
|
-
and returns the grouped dataframes.
|
1379
|
-
|
1380
|
-
Parameters:
|
1381
|
-
df (pandas.DataFrame): The input dataframe.
|
1382
|
-
group_by (str): The column name to group the dataframes by.
|
1383
|
-
object_type (str): The column name to concatenate with 'prcf' to create a new column 'prcfo'.
|
1384
|
-
|
1385
|
-
Returns:
|
1386
|
-
grouped_numeric (pandas.DataFrame): The grouped dataframe containing numeric columns.
|
1387
|
-
grouped_non_numeric (pandas.DataFrame): The grouped dataframe containing non-numeric columns.
|
1388
|
-
"""
|
1389
|
-
|
1390
|
-
if 'prcf' not in df.columns:
|
1391
|
-
try:
|
1392
|
-
df['prcf'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column_name'].astype(str) + '_' + df['field'].astype(str)
|
1393
|
-
except Exception as e:
|
1394
|
-
print(e)
|
1395
|
-
|
1396
|
-
df['prcfo'] = df['prcf'] + '_' + df[object_type]
|
1397
|
-
df = df.set_index(group_by, inplace=False)
|
1398
|
-
|
1399
|
-
df_numeric = df.select_dtypes(include=np.number)
|
1400
|
-
df_non_numeric = df.select_dtypes(exclude=np.number)
|
1401
|
-
|
1402
|
-
[]
|
1403
|
-
|
1404
|
-
grouped_numeric = df_numeric.groupby(df_numeric.index).mean()
|
1405
|
-
grouped_non_numeric = df_non_numeric.groupby(df_non_numeric.index).first()
|
1406
|
-
|
1407
|
-
return pd.DataFrame(grouped_numeric), pd.DataFrame(grouped_non_numeric)
|
1408
|
-
|
1409
1375
|
def _split_data(df, group_by, object_type):
|
1410
1376
|
"""
|
1411
1377
|
Splits the input dataframe into numeric and non-numeric parts, groups them by the specified column,
|
@@ -5045,19 +5011,22 @@ def generate_cytoplasm_mask(nucleus_mask, cell_mask):
|
|
5045
5011
|
return cytoplasm_mask
|
5046
5012
|
|
5047
5013
|
def add_column_to_database(settings):
|
5048
|
-
"""
|
5049
|
-
Adds a new column to the database table by matching on a common column from the DataFrame.
|
5050
|
-
If the column already exists in the database, it adds the column with a suffix.
|
5051
|
-
NaN values will remain as NULL in the database.
|
5052
|
-
|
5053
|
-
Parameters:
|
5054
|
-
|
5055
|
-
|
5056
|
-
|
5057
|
-
|
5058
|
-
|
5059
|
-
|
5060
|
-
|
5014
|
+
#"""
|
5015
|
+
#Adds a new column to the database table by matching on a common column from the DataFrame.
|
5016
|
+
#If the column already exists in the database, it adds the column with a suffix.
|
5017
|
+
#NaN values will remain as NULL in the database.
|
5018
|
+
|
5019
|
+
#Parameters:
|
5020
|
+
# settings (dict): A dictionary containing the following keys:
|
5021
|
+
# csv_path (str): Path to the CSV file with the data to be added.
|
5022
|
+
# db_path (str): Path to the SQLite database (or connection string for other databases).
|
5023
|
+
# table_name (str): The name of the table in the database.
|
5024
|
+
# update_column (str): The name of the new column in the DataFrame to add to the database.
|
5025
|
+
# match_column (str): The common column used to match rows.
|
5026
|
+
|
5027
|
+
#Returns:
|
5028
|
+
# None
|
5029
|
+
#"""
|
5061
5030
|
|
5062
5031
|
# Read the DataFrame from the provided CSV path
|
5063
5032
|
df = pd.read_csv(settings['csv_path'])
|
@@ -5147,8 +5116,8 @@ def correct_metadata_column_names(df):
|
|
5147
5116
|
df = df.rename(columns={'plate_name': 'plate'})
|
5148
5117
|
if 'column_name' in df.columns:
|
5149
5118
|
df = df.rename(columns={'column_name': 'column'})
|
5150
|
-
if '
|
5151
|
-
df = df.rename(columns={'
|
5119
|
+
if 'col' in df.columns:
|
5120
|
+
df = df.rename(columns={'col': 'column'})
|
5152
5121
|
if 'row_name' in df.columns:
|
5153
5122
|
df = df.rename(columns={'row_name': 'row_name'})
|
5154
5123
|
if 'grna_name' in df.columns:
|
@@ -5242,3 +5211,129 @@ def group_feature_class(df, feature_groups=['cell', 'cytoplasm', 'nucleus', 'pat
|
|
5242
5211
|
, ignore_index=True)
|
5243
5212
|
|
5244
5213
|
return df
|
5214
|
+
|
5215
|
+
def delete_intermedeate_files(settings):
|
5216
|
+
|
5217
|
+
path_orig = os.path.join(settings['src'], 'orig')
|
5218
|
+
path_stack = os.path.join(settings['src'], 'stack')
|
5219
|
+
merged_stack = os.path.join(settings['src'], 'merged')
|
5220
|
+
path_norm_chan_stack = os.path.join(settings['src'], 'norm_channel_stack')
|
5221
|
+
path_1 = os.path.join(settings['src'], '1')
|
5222
|
+
path_2 = os.path.join(settings['src'], '2')
|
5223
|
+
path_3 = os.path.join(settings['src'], '3')
|
5224
|
+
path_4 = os.path.join(settings['src'], '4')
|
5225
|
+
path_5 = os.path.join(settings['src'], '5')
|
5226
|
+
path_6 = os.path.join(settings['src'], '6')
|
5227
|
+
path_7 = os.path.join(settings['src'], '7')
|
5228
|
+
path_8 = os.path.join(settings['src'], '8')
|
5229
|
+
path_9 = os.path.join(settings['src'], '9')
|
5230
|
+
path_10 = os.path.join(settings['src'], '10')
|
5231
|
+
|
5232
|
+
paths = [path_stack, path_norm_chan_stack, path_1, path_2, path_3, path_4, path_5, path_6, path_7, path_8, path_9, path_10]
|
5233
|
+
|
5234
|
+
merged_len = len(merged_stack)
|
5235
|
+
stack_len = len(path_stack)
|
5236
|
+
|
5237
|
+
if merged_len == stack_len and stack_len != 0:
|
5238
|
+
if 'src' in settings:
|
5239
|
+
if os.path.exists(settings['src']):
|
5240
|
+
if os.path.exists(path_orig):
|
5241
|
+
for path in paths:
|
5242
|
+
if os.path.exists(path):
|
5243
|
+
try:
|
5244
|
+
shutil.rmtree(path)
|
5245
|
+
print(f"Deleted {path}")
|
5246
|
+
except OSError as e:
|
5247
|
+
print(f"{path} could not be deleted: {e}. Delete manually.")
|
5248
|
+
else:
|
5249
|
+
print(f"{path_orig} does not exist.")
|
5250
|
+
else:
|
5251
|
+
print(f"{settings['src']} does not exist.")
|
5252
|
+
else:
|
5253
|
+
print("No 'src' key in settings dictionary.")
|
5254
|
+
|
5255
|
+
def filter_and_save_csv(input_csv, output_csv, column_name, upper_threshold, lower_threshold):
|
5256
|
+
"""
|
5257
|
+
Reads a CSV into a DataFrame, filters rows based on a column for values > upper_threshold and < lower_threshold,
|
5258
|
+
and saves the filtered DataFrame to a new CSV file.
|
5259
|
+
|
5260
|
+
Parameters:
|
5261
|
+
input_csv (str): Path to the input CSV file.
|
5262
|
+
output_csv (str): Path to save the filtered CSV file.
|
5263
|
+
column_name (str): Column name to apply the filters on.
|
5264
|
+
upper_threshold (float): Upper threshold for filtering (values greater than this are retained).
|
5265
|
+
lower_threshold (float): Lower threshold for filtering (values less than this are retained).
|
5266
|
+
|
5267
|
+
Returns:
|
5268
|
+
None
|
5269
|
+
"""
|
5270
|
+
# Read the input CSV file into a DataFrame
|
5271
|
+
df = pd.read_csv(input_csv)
|
5272
|
+
|
5273
|
+
# Filter rows based on the thresholds
|
5274
|
+
filtered_df = df[(df[column_name] > upper_threshold) | (df[column_name] < lower_threshold)]
|
5275
|
+
|
5276
|
+
# Save the filtered DataFrame to a new CSV file
|
5277
|
+
filtered_df.to_csv(output_csv, index=False)
|
5278
|
+
display(filtered_df)
|
5279
|
+
|
5280
|
+
print(f"Filtered DataFrame saved to {output_csv}")
|
5281
|
+
|
5282
|
+
def extract_tar_bz2_files(folder_path):
|
5283
|
+
"""
|
5284
|
+
Extracts all .tar.bz2 files in the given folder into subfolders with the same name as the tar file.
|
5285
|
+
|
5286
|
+
Parameters:
|
5287
|
+
folder_path (str): Path to the folder containing .tar.bz2 files.
|
5288
|
+
"""
|
5289
|
+
if not os.path.isdir(folder_path):
|
5290
|
+
raise ValueError(f"The provided path '{folder_path}' is not a valid folder.")
|
5291
|
+
|
5292
|
+
# Iterate over files in the folder
|
5293
|
+
for file_name in os.listdir(folder_path):
|
5294
|
+
if file_name.endswith('.tar.bz2'):
|
5295
|
+
file_path = os.path.join(folder_path, file_name)
|
5296
|
+
extract_folder = os.path.join(folder_path, os.path.splitext(os.path.splitext(file_name)[0])[0])
|
5297
|
+
|
5298
|
+
# Create the subfolder for extraction if it doesn't exist
|
5299
|
+
os.makedirs(extract_folder, exist_ok=True)
|
5300
|
+
|
5301
|
+
# Extract the tar.bz2 file
|
5302
|
+
try:
|
5303
|
+
with tarfile.open(file_path, 'r:bz2') as tar:
|
5304
|
+
tar.extractall(path=extract_folder)
|
5305
|
+
print(f"Extracted: {file_name} -> {extract_folder}")
|
5306
|
+
except Exception as e:
|
5307
|
+
print(f"Failed to extract {file_name}: {e}")
|
5308
|
+
|
5309
|
+
|
5310
|
+
def calculate_shortest_distance(df, object1, object2):
|
5311
|
+
"""
|
5312
|
+
Calculate the shortest edge-to-edge distance between two objects (e.g., pathogen and nucleus).
|
5313
|
+
|
5314
|
+
Parameters:
|
5315
|
+
- df: Pandas DataFrame containing measurements
|
5316
|
+
- object1: String, name of the first object (e.g., "pathogen")
|
5317
|
+
- object2: String, name of the second object (e.g., "nucleus")
|
5318
|
+
|
5319
|
+
Returns:
|
5320
|
+
- df: Pandas DataFrame with a new column for shortest edge-to-edge distance.
|
5321
|
+
"""
|
5322
|
+
|
5323
|
+
# Compute centroid-to-centroid Euclidean distance
|
5324
|
+
centroid_distance = np.sqrt(
|
5325
|
+
(df[f'{object1}_channel_0_centroid_weighted-0'] - df[f'{object2}_channel_0_centroid_weighted-0'])**2 +
|
5326
|
+
(df[f'{object1}_channel_0_centroid_weighted-1'] - df[f'{object2}_channel_0_centroid_weighted-1'])**2
|
5327
|
+
)
|
5328
|
+
|
5329
|
+
# Estimate object radii using Feret diameters
|
5330
|
+
object1_radius = df[f'{object1}_feret_diameter_max'] / 2
|
5331
|
+
object2_radius = df[f'{object2}_feret_diameter_max'] / 2
|
5332
|
+
|
5333
|
+
# Compute shortest edge-to-edge distance
|
5334
|
+
shortest_distance = centroid_distance - (object1_radius + object2_radius)
|
5335
|
+
|
5336
|
+
# Ensure distances are non-negative (overlapping objects should have distance 0)
|
5337
|
+
df[f'{object1}_{object2}_shortest_distance'] = np.maximum(shortest_distance, 0)
|
5338
|
+
|
5339
|
+
return df
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: spacr
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.4.0
|
4
4
|
Summary: Spatial phenotype analysis of crisp screens (SpaCr)
|
5
5
|
Home-page: https://github.com/EinarOlafsson/spacr
|
6
6
|
Author: Einar Birnir Olafsson
|
@@ -32,6 +32,7 @@ Requires-Dist: monai>=1.3.0
|
|
32
32
|
Requires-Dist: captum<1.0,>=0.7.0
|
33
33
|
Requires-Dist: seaborn<1.0,>=0.13.2
|
34
34
|
Requires-Dist: matplotlib<4.0,>=3.8.3
|
35
|
+
Requires-Dist: matplotlib-venn<2.0,>=1.1
|
35
36
|
Requires-Dist: adjustText<2.0,>=1.2.0
|
36
37
|
Requires-Dist: bottleneck<2.0,>=1.3.6
|
37
38
|
Requires-Dist: numexpr<3.0,>=2.8.4
|
@@ -1,4 +1,4 @@
|
|
1
|
-
spacr/__init__.py,sha256=
|
1
|
+
spacr/__init__.py,sha256=KgkkUyqbm4kh8bwxWeFpp4rilNE0y0RkeylPvS-cTLY,1395
|
2
2
|
spacr/__main__.py,sha256=bkAJJD2kjIqOP-u1kLvct9jQQCeUXzlEjdgitwi1Lm8,75
|
3
3
|
spacr/app_annotate.py,sha256=W9eLPa_LZIvXsXx_-0iDFEU938LBDvRy6prXo0qF4KQ,2533
|
4
4
|
spacr/app_classify.py,sha256=urTP_wlZ58hSyM5a19slYlBxN0PdC-9-ga0hvq8CGWc,165
|
@@ -9,27 +9,27 @@ spacr/app_sequencing.py,sha256=DjG26jy4cpddnV8WOOAIiExtOe9MleVMY4MFa5uTo5w,157
|
|
9
9
|
spacr/app_umap.py,sha256=ZWAmf_OsIKbYvolYuWPMYhdlVe-n2CADoJulAizMiEo,153
|
10
10
|
spacr/cellpose.py,sha256=RBHMs2vwXcfkj0xqAULpALyzJYXddSRycgZSzmwI7v0,14755
|
11
11
|
spacr/chat_bot.py,sha256=n3Fhqg3qofVXHmh3H9sUcmfYy9MmgRnr48663MVdY9E,1244
|
12
|
-
spacr/core.py,sha256=
|
13
|
-
spacr/deep_spacr.py,sha256=
|
12
|
+
spacr/core.py,sha256=lKeqmsVrGQ8cPU_WkoNGNBWrk-gtR1RkRkwDdnJ0u64,48829
|
13
|
+
spacr/deep_spacr.py,sha256=AsAbehxPChAq65YVPuFqCt5JabdeO8AwVWZmcmyLeFw,58020
|
14
14
|
spacr/gui.py,sha256=ARyn9Q_g8HoP-cXh1nzMLVFCKqthY4v2u9yORyaQqQE,8230
|
15
|
-
spacr/gui_core.py,sha256=
|
16
|
-
spacr/gui_elements.py,sha256=
|
17
|
-
spacr/gui_utils.py,sha256=
|
18
|
-
spacr/io.py,sha256=
|
15
|
+
spacr/gui_core.py,sha256=U0A7waKgWq_Es9fMwcZbXUZYGzCqt2bgfY3HbxiFXnw,47466
|
16
|
+
spacr/gui_elements.py,sha256=HmITDncklKwtdFhxLhtYXOwndsRfgwWIPVi83VlXHB4,146419
|
17
|
+
spacr/gui_utils.py,sha256=0rDF23BUGcmjSJvfCiLoxhlGJdHkio1jTxyCzrMXr-g,41211
|
18
|
+
spacr/io.py,sha256=oqJwDJWksVdWE0bRAwytTOsjlL0o-J9lr_pQaw2cQ4Y,138288
|
19
19
|
spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
|
20
|
-
spacr/measure.py,sha256=
|
20
|
+
spacr/measure.py,sha256=jmOnLBudq3TuY723Cfo1EJBn67P6rlEvL6I-2FSkUgI,55315
|
21
21
|
spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
|
22
|
-
spacr/ml.py,sha256=
|
22
|
+
spacr/ml.py,sha256=MrIAtUUxMOibWVL1SjCUnYlizawCp3l3SeY4Y9yEsPw,97251
|
23
23
|
spacr/openai.py,sha256=5vBZ3Jl2llYcW3oaTEXgdyCB2aJujMUIO5K038z7w_A,1246
|
24
|
-
spacr/plot.py,sha256=
|
24
|
+
spacr/plot.py,sha256=p4AY5EWmVNPMqFD0I4NARJkvJA5gTErbDDYqDMU18pc,169479
|
25
25
|
spacr/sequencing.py,sha256=ClUfwPPK6rNUbUuiEkzcwakzVyDKKUMv9ricrxT8qQY,25227
|
26
|
-
spacr/settings.py,sha256=
|
26
|
+
spacr/settings.py,sha256=e6QWZ5o6Im02_t-3GQh3H4kksMTQmIZ1Rbh3BeQmmsw,84000
|
27
27
|
spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
|
28
28
|
spacr/stats.py,sha256=mbhwsyIqt5upsSD346qGjdCw7CFBa0tIS7zHU9e0jNI,9536
|
29
|
-
spacr/submodules.py,sha256=
|
29
|
+
spacr/submodules.py,sha256=e_JNMGBHakEra_2pstHFmgI1NhF9TybfvTNDAegVsl0,67626
|
30
30
|
spacr/timelapse.py,sha256=KGfG4L4-QnFfgbF7L6C5wL_3gd_rqr05Foje6RsoTBg,39603
|
31
|
-
spacr/toxo.py,sha256=
|
32
|
-
spacr/utils.py,sha256=
|
31
|
+
spacr/toxo.py,sha256=TmuhejSIPLBvsgeblsUgSvBFCR1gOkApyTKidooJ5Us,26044
|
32
|
+
spacr/utils.py,sha256=m4MZiNbmQpZD78eu5DNmxk4cU_tk2VeVLCe_R_7YShY,226287
|
33
33
|
spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
|
34
34
|
spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
|
35
35
|
spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
|
@@ -152,9 +152,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
|
|
152
152
|
spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
|
153
153
|
spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
|
154
154
|
spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
|
155
|
-
spacr-0.
|
156
|
-
spacr-0.
|
157
|
-
spacr-0.
|
158
|
-
spacr-0.
|
159
|
-
spacr-0.
|
160
|
-
spacr-0.
|
155
|
+
spacr-0.4.0.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
|
156
|
+
spacr-0.4.0.dist-info/METADATA,sha256=uloYFy8DrWtHZvy5W47jBRNUEF_SeX4nLZ0OL2wMizc,6072
|
157
|
+
spacr-0.4.0.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
|
158
|
+
spacr-0.4.0.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
|
159
|
+
spacr-0.4.0.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
|
160
|
+
spacr-0.4.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|