spacr 0.4.15__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/__init__.py +2 -2
- spacr/core.py +52 -10
- spacr/deep_spacr.py +2 -3
- spacr/gui.py +0 -1
- spacr/gui_core.py +247 -41
- spacr/gui_elements.py +133 -2
- spacr/gui_utils.py +22 -17
- spacr/io.py +624 -149
- spacr/ml.py +141 -258
- spacr/plot.py +76 -34
- spacr/resources/MEDIAR/__pycache__/SetupDict.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/__pycache__/evaluate.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/__pycache__/generate_mapping.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/__pycache__/main.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/Baseline/__pycache__/Predictor.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/Baseline/__pycache__/Trainer.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/Baseline/__pycache__/__init__.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/Baseline/__pycache__/utils.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/MEDIAR/__pycache__/EnsemblePredictor.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/MEDIAR/__pycache__/Predictor.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/MEDIAR/__pycache__/Trainer.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/MEDIAR/__pycache__/__init__.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/MEDIAR/__pycache__/utils.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/__pycache__/BasePredictor.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/__pycache__/BaseTrainer.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/__pycache__/__init__.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/core/__pycache__/utils.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/__pycache__/__init__.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/__pycache__/measures.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/__pycache__/utils.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/data_utils/__pycache__/__init__.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/data_utils/__pycache__/datasetter.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/data_utils/__pycache__/transforms.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/data_utils/__pycache__/utils.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/data_utils/custom/__pycache__/CellAware.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/data_utils/custom/__pycache__/LoadImage.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/data_utils/custom/__pycache__/NormalizeImage.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/data_utils/custom/__pycache__/__init__.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/models/__pycache__/MEDIARFormer.cpython-39.pyc +0 -0
- spacr/resources/MEDIAR/train_tools/models/__pycache__/__init__.cpython-39.pyc +0 -0
- spacr/sequencing.py +73 -38
- spacr/settings.py +161 -135
- spacr/submodules.py +618 -215
- spacr/timelapse.py +197 -29
- spacr/toxo.py +23 -23
- spacr/utils.py +186 -128
- {spacr-0.4.15.dist-info → spacr-0.5.0.dist-info}/METADATA +5 -2
- {spacr-0.4.15.dist-info → spacr-0.5.0.dist-info}/RECORD +53 -24
- spacr/stats.py +0 -221
- /spacr/{cellpose.py → spacr_cellpose.py} +0 -0
- {spacr-0.4.15.dist-info → spacr-0.5.0.dist-info}/LICENSE +0 -0
- {spacr-0.4.15.dist-info → spacr-0.5.0.dist-info}/WHEEL +0 -0
- {spacr-0.4.15.dist-info → spacr-0.5.0.dist-info}/entry_points.txt +0 -0
- {spacr-0.4.15.dist-info → spacr-0.5.0.dist-info}/top_level.txt +0 -0
spacr/utils.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
import os, re, sqlite3, torch, torchvision, random, string, shutil, cv2, tarfile, glob, psutil, platform, gzip, subprocess, time, requests, ast, traceback
|
2
|
-
|
3
2
|
import numpy as np
|
4
3
|
import pandas as pd
|
5
4
|
from cellpose import models as cp_models
|
@@ -78,7 +77,7 @@ def filepaths_to_database(img_paths, settings, source_folder, crop_mode):
|
|
78
77
|
|
79
78
|
parts = png_df['file_name'].apply(lambda x: pd.Series(_map_wells_png(x, timelapse=settings['timelapse'])))
|
80
79
|
|
81
|
-
columns = ['
|
80
|
+
columns = ['plateID', 'rowID', 'columnID', 'fieldID']
|
82
81
|
|
83
82
|
if settings['timelapse']:
|
84
83
|
columns = columns + ['time_id']
|
@@ -113,7 +112,7 @@ def activation_maps_to_database(img_paths, source_folder, settings):
|
|
113
112
|
png_df = pd.DataFrame(img_paths, columns=['png_path'])
|
114
113
|
png_df['file_name'] = png_df['png_path'].apply(lambda x: os.path.basename(x))
|
115
114
|
parts = png_df['file_name'].apply(lambda x: pd.Series(_map_wells_png(x, timelapse=False)))
|
116
|
-
columns = ['
|
115
|
+
columns = ['plateID', 'rowID', 'columnID', 'fieldID', 'prcfo', 'object']
|
117
116
|
png_df[columns] = parts
|
118
117
|
|
119
118
|
dataset_name = os.path.splitext(os.path.basename(settings['dataset']))[0]
|
@@ -136,7 +135,7 @@ def activation_correlations_to_database(df, img_paths, source_folder, settings):
|
|
136
135
|
png_df = pd.DataFrame(img_paths, columns=['png_path'])
|
137
136
|
png_df['file_name'] = png_df['png_path'].apply(lambda x: os.path.basename(x))
|
138
137
|
parts = png_df['file_name'].apply(lambda x: pd.Series(_map_wells_png(x, timelapse=False)))
|
139
|
-
columns = ['
|
138
|
+
columns = ['plateID', 'rowID', 'columnID', 'fieldID', 'prcfo', 'object']
|
140
139
|
png_df[columns] = parts
|
141
140
|
|
142
141
|
# Align both DataFrames by file_name
|
@@ -319,18 +318,26 @@ def load_settings(csv_file_path, show=False, setting_key='setting_key', setting_
|
|
319
318
|
|
320
319
|
return result_dict
|
321
320
|
|
322
|
-
|
323
321
|
def save_settings(settings, name='settings', show=False):
|
324
322
|
|
325
|
-
|
326
|
-
if show:
|
327
|
-
display(settings_df)
|
323
|
+
settings_2 = settings.copy()
|
328
324
|
|
329
|
-
if isinstance(
|
330
|
-
src =
|
325
|
+
if isinstance(settings_2['src'], list):
|
326
|
+
src = settings_2['src'][0]
|
331
327
|
name = f"{name}_list"
|
332
328
|
else:
|
333
|
-
src =
|
329
|
+
src = settings_2['src']
|
330
|
+
|
331
|
+
if 'test_mode' in settings_2.keys():
|
332
|
+
settings_2['test_mode'] = False
|
333
|
+
|
334
|
+
if 'plot' in settings_2.keys():
|
335
|
+
settings_2['plot'] = False
|
336
|
+
|
337
|
+
settings_df = pd.DataFrame(list(settings_2.items()), columns=['Key', 'Value'])
|
338
|
+
|
339
|
+
if show:
|
340
|
+
display(settings_df)
|
334
341
|
|
335
342
|
settings_csv = os.path.join(src,'settings',f'{name}.csv')
|
336
343
|
os.makedirs(os.path.join(src,'settings'), exist_ok=True)
|
@@ -546,10 +553,10 @@ def _get_cellpose_batch_size():
|
|
546
553
|
except Exception as e:
|
547
554
|
return 8
|
548
555
|
|
549
|
-
def _extract_filename_metadata(filenames, src, regular_expression, metadata_type='cellvoyager'
|
556
|
+
def _extract_filename_metadata(filenames, src, regular_expression, metadata_type='cellvoyager'):
|
550
557
|
|
551
558
|
images_by_key = defaultdict(list)
|
552
|
-
|
559
|
+
|
553
560
|
for filename in filenames:
|
554
561
|
match = regular_expression.match(filename)
|
555
562
|
if match:
|
@@ -560,39 +567,44 @@ def _extract_filename_metadata(filenames, src, regular_expression, metadata_type
|
|
560
567
|
plate = os.path.basename(src)
|
561
568
|
|
562
569
|
well = match.group('wellID')
|
563
|
-
field = match.group('fieldID')
|
564
|
-
channel = match.group('chanID')
|
565
|
-
mode = None
|
566
|
-
|
567
570
|
if well[0].isdigit():
|
568
571
|
well = str(_safe_int_convert(well))
|
572
|
+
|
573
|
+
field = match.group('fieldID')
|
569
574
|
if field[0].isdigit():
|
570
575
|
field = str(_safe_int_convert(field))
|
576
|
+
|
577
|
+
channel = match.group('chanID')
|
571
578
|
if channel[0].isdigit():
|
572
579
|
channel = str(_safe_int_convert(channel))
|
573
|
-
|
580
|
+
|
581
|
+
if 'timeID' in match.groupdict():
|
582
|
+
timeID = match.group('timeID')
|
583
|
+
if timeID[0].isdigit():
|
584
|
+
timeID = str(_safe_int_convert(timeID))
|
585
|
+
else:
|
586
|
+
timeID = None
|
587
|
+
|
588
|
+
if 'sliceID' in match.groupdict():
|
589
|
+
sliceID = match.group('sliceID')
|
590
|
+
if sliceID[0].isdigit():
|
591
|
+
sliceID = str(_safe_int_convert(sliceID))
|
592
|
+
else:
|
593
|
+
sliceID = None
|
594
|
+
|
574
595
|
if metadata_type =='cq1':
|
575
596
|
orig_wellID = wellID
|
576
597
|
wellID = _convert_cq1_well_id(wellID)
|
577
598
|
print(f'Converted Well ID: {orig_wellID} to {wellID}', end='\r', flush=True)
|
578
599
|
|
579
|
-
|
580
|
-
|
581
|
-
mode = match.group('AID')
|
582
|
-
except IndexError:
|
583
|
-
sliceid = '00'
|
584
|
-
|
585
|
-
if mode == skip_mode:
|
586
|
-
continue
|
587
|
-
|
588
|
-
key = (plate, well, field, channel, mode)
|
589
|
-
file_path = os.path.join(src, filename) # Store the full path
|
600
|
+
key = (plate, well, field, channel, timeID, sliceID)
|
601
|
+
file_path = os.path.join(src, filename)
|
590
602
|
images_by_key[key].append(file_path)
|
591
603
|
|
592
604
|
except IndexError:
|
593
605
|
print(f"Could not extract information from filename {filename} using provided regex")
|
594
606
|
else:
|
595
|
-
print(f"Filename {filename} did not match provided regex")
|
607
|
+
print(f"Filename {filename} did not match provided regex: {regular_expression}")
|
596
608
|
continue
|
597
609
|
|
598
610
|
return images_by_key
|
@@ -634,11 +646,11 @@ def _update_database_with_merged_info(db_path, df, table='png_list', columns=['p
|
|
634
646
|
if 'prcfo' not in df.columns:
|
635
647
|
print(f'generating prcfo columns')
|
636
648
|
try:
|
637
|
-
df['prcfo'] = df['
|
649
|
+
df['prcfo'] = df['plateID'].astype(str) + '_' + df['rowID'].astype(str) + '_' + df['columnID'].astype(str) + '_' + df['fieldID'].astype(str) + '_o' + df['object_label'].astype(int).astype(str)
|
638
650
|
except Exception as e:
|
639
651
|
print('Merging on cell failed, trying with cell_id')
|
640
652
|
try:
|
641
|
-
df['prcfo'] = df['
|
653
|
+
df['prcfo'] = df['plateID'].astype(str) + '_' + df['rowID'].astype(str) + '_' + df['columnID'].astype(str) + '_' + df['fieldID'].astype(str) + '_o' + df['cell_id'].astype(int).astype(str)
|
642
654
|
except Exception as e:
|
643
655
|
print(e)
|
644
656
|
|
@@ -730,7 +742,7 @@ def _map_values(row, values, locs):
|
|
730
742
|
if locs:
|
731
743
|
value_dict = {loc: value for value, loc_list in zip(values, locs) for loc in loc_list}
|
732
744
|
# Determine if we're dealing with row or column based on first location identifier
|
733
|
-
type_ = '
|
745
|
+
type_ = 'rowID' if locs[0][0][0] == 'r' else 'columnID'
|
734
746
|
return value_dict.get(row[type_], None)
|
735
747
|
return values[0] if values else None
|
736
748
|
|
@@ -915,21 +927,21 @@ def _merge_and_save_to_database(morph_df, intensity_df, table_type, source_folde
|
|
915
927
|
merged_df['file_name'] = file_name
|
916
928
|
merged_df['path_name'] = os.path.join(source_folder, file_name + '.npy')
|
917
929
|
if timelapse:
|
918
|
-
merged_df[['
|
930
|
+
merged_df[['plateID', 'rowID', 'columnID', 'fieldID', 'timeid', 'prcf']] = merged_df['file_name'].apply(lambda x: pd.Series(_map_wells(x, timelapse)))
|
919
931
|
else:
|
920
|
-
merged_df[['
|
932
|
+
merged_df[['plateID', 'rowID', 'columnID', 'fieldID', 'prcf']] = merged_df['file_name'].apply(lambda x: pd.Series(_map_wells(x, timelapse)))
|
921
933
|
cols = merged_df.columns.tolist() # get the list of all columns
|
922
934
|
if table_type == 'cell' or table_type == 'cytoplasm':
|
923
|
-
column_list = ['object_label', '
|
935
|
+
column_list = ['object_label', 'plateID', 'rowID', 'columnID', 'fieldID', 'prcf', 'file_name', 'path_name']
|
924
936
|
elif table_type == 'nucleus' or table_type == 'pathogen':
|
925
|
-
column_list = ['object_label', 'cell_id', '
|
937
|
+
column_list = ['object_label', 'cell_id', 'plateID', 'rowID', 'columnID', 'fieldID', 'prcf', 'file_name', 'path_name']
|
926
938
|
else:
|
927
939
|
raise ValueError(f"Invalid table_type: {table_type}")
|
928
940
|
# Check if all columns in column_list are in cols
|
929
941
|
missing_columns = [col for col in column_list if col not in cols]
|
930
942
|
if len(missing_columns) == 1 and missing_columns[0] == 'cell_id':
|
931
943
|
missing_columns = False
|
932
|
-
column_list = ['object_label', '
|
944
|
+
column_list = ['object_label', 'plateID', 'rowID', 'columnID', 'fieldID', 'prcf', 'file_name', 'path_name']
|
933
945
|
if missing_columns:
|
934
946
|
raise ValueError(f"Columns missing in DataFrame: {missing_columns}")
|
935
947
|
for i, col in enumerate(column_list):
|
@@ -1143,43 +1155,6 @@ def _masks_to_masks_stack(masks):
|
|
1143
1155
|
for idx, mask in enumerate(masks):
|
1144
1156
|
mask_stack.append(mask)
|
1145
1157
|
return mask_stack
|
1146
|
-
|
1147
|
-
def _get_diam_v1(mag, obj):
|
1148
|
-
|
1149
|
-
if mag == 20:
|
1150
|
-
if obj == 'cell':
|
1151
|
-
diamiter = 120
|
1152
|
-
elif obj == 'nucleus':
|
1153
|
-
diamiter = 60
|
1154
|
-
elif obj == 'pathogen':
|
1155
|
-
diamiter = 20
|
1156
|
-
else:
|
1157
|
-
raise ValueError("Invalid magnification: Use 20, 40 or 60")
|
1158
|
-
|
1159
|
-
elif mag == 40:
|
1160
|
-
if obj == 'cell':
|
1161
|
-
diamiter = 160
|
1162
|
-
elif obj == 'nucleus':
|
1163
|
-
diamiter = 80
|
1164
|
-
elif obj == 'pathogen':
|
1165
|
-
diamiter = 40
|
1166
|
-
else:
|
1167
|
-
raise ValueError("Invalid magnification: Use 20, 40 or 60")
|
1168
|
-
|
1169
|
-
elif mag == 60:
|
1170
|
-
if obj == 'cell':
|
1171
|
-
diamiter = 200
|
1172
|
-
if obj == 'nucleus':
|
1173
|
-
diamiter = 90
|
1174
|
-
if obj == 'pathogen':
|
1175
|
-
diamiter = 60
|
1176
|
-
else:
|
1177
|
-
raise ValueError("Invalid magnification: Use 20, 40 or 60")
|
1178
|
-
|
1179
|
-
else:
|
1180
|
-
raise ValueError("Invalid magnification: Use 20, 40 or 60")
|
1181
|
-
|
1182
|
-
return diamiter
|
1183
1158
|
|
1184
1159
|
def _get_diam(mag, obj):
|
1185
1160
|
|
@@ -1339,11 +1314,11 @@ def annotate_conditions(df, cells=None, cell_loc=None, pathogens=None, pathogen_
|
|
1339
1314
|
"""
|
1340
1315
|
|
1341
1316
|
def _get_type(val):
|
1342
|
-
"""Determine if a value maps to '
|
1317
|
+
"""Determine if a value maps to 'rowID' or 'columnID'."""
|
1343
1318
|
if isinstance(val, str) and val.startswith('c'):
|
1344
|
-
return '
|
1319
|
+
return 'columnID'
|
1345
1320
|
elif isinstance(val, str) and val.startswith('r'):
|
1346
|
-
return '
|
1321
|
+
return 'rowID'
|
1347
1322
|
return None
|
1348
1323
|
|
1349
1324
|
def _map_or_default(column_name, values, loc, df):
|
@@ -1411,7 +1386,7 @@ def _split_data(df, group_by, object_type):
|
|
1411
1386
|
# Ensure 'prcf' column exists by concatenating specific columns
|
1412
1387
|
if 'prcf' not in df.columns:
|
1413
1388
|
try:
|
1414
|
-
df['prcf'] = df['
|
1389
|
+
df['prcf'] = df['plateID'].astype(str) + '_' + df['rowID'].astype(str) + '_' + df['columnID'].astype(str) + '_' + df['fieldID'].astype(str)
|
1415
1390
|
except Exception as e:
|
1416
1391
|
print(e)
|
1417
1392
|
|
@@ -1508,7 +1483,7 @@ def _group_by_well(df):
|
|
1508
1483
|
non_numeric_cols = df.select_dtypes(include=['object']).columns
|
1509
1484
|
|
1510
1485
|
# Apply mean function to numeric columns and first to non-numeric
|
1511
|
-
df_grouped = df.groupby(['
|
1486
|
+
df_grouped = df.groupby(['plateID', 'rowID', 'columnID']).agg({**{col: np.mean for col in numeric_cols}, **{col: 'first' for col in non_numeric_cols}})
|
1512
1487
|
return df_grouped
|
1513
1488
|
|
1514
1489
|
###################################################
|
@@ -2187,11 +2162,11 @@ def augment_classes(dst, nc, pc, generate=True,move=True):
|
|
2187
2162
|
def annotate_predictions(csv_loc):
|
2188
2163
|
df = pd.read_csv(csv_loc)
|
2189
2164
|
df['filename'] = df['path'].apply(lambda x: x.split('/')[-1])
|
2190
|
-
df[['
|
2165
|
+
df[['plateID', 'well', 'fieldID', 'object']] = df['filename'].str.split('_', expand=True)
|
2191
2166
|
df['object'] = df['object'].str.replace('.png', '')
|
2192
2167
|
|
2193
2168
|
def assign_condition(row):
|
2194
|
-
plate = int(row['
|
2169
|
+
plate = int(row['plateID'])
|
2195
2170
|
col = int(row['well'][1:])
|
2196
2171
|
|
2197
2172
|
if col > 3:
|
@@ -2342,7 +2317,7 @@ def check_multicollinearity(x):
|
|
2342
2317
|
|
2343
2318
|
def lasso_reg(merged_df, alpha_value=0.01, reg_type='lasso'):
|
2344
2319
|
# Separate predictors and response
|
2345
|
-
X = merged_df[['gene', 'grna', '
|
2320
|
+
X = merged_df[['gene', 'grna', 'plateID', 'rowID', 'columnID']]
|
2346
2321
|
y = merged_df['pred']
|
2347
2322
|
|
2348
2323
|
# One-hot encode the categorical predictors
|
@@ -3120,12 +3095,8 @@ def _get_regex(metadata_type, img_format, custom_regex=None):
|
|
3120
3095
|
regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
|
3121
3096
|
elif metadata_type == 'cq1':
|
3122
3097
|
regex = f'W(?P<wellID>.*)F(?P<fieldID>.*)T(?P<timeID>.*)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
|
3123
|
-
elif metadata_type == '
|
3124
|
-
regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID
|
3125
|
-
elif metadata_type == 'zeis':
|
3126
|
-
regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
|
3127
|
-
elif metadata_type == 'leica':
|
3128
|
-
regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
|
3098
|
+
elif metadata_type == 'auto':
|
3099
|
+
regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>.*)C(?P<chanID>.*).tif'
|
3129
3100
|
elif metadata_type == 'custom':
|
3130
3101
|
regex = f'({custom_regex}){img_format}'
|
3131
3102
|
|
@@ -3143,7 +3114,7 @@ def _run_test_mode(src, regex, timelapse=False, test_images=10, random_test=True
|
|
3143
3114
|
|
3144
3115
|
if os.path.exists(os.path.join(src, 'orig')):
|
3145
3116
|
src = os.path.join(src, 'orig')
|
3146
|
-
|
3117
|
+
|
3147
3118
|
all_filenames = [filename for filename in os.listdir(src) if regular_expression.match(filename)]
|
3148
3119
|
print(f'Found {len(all_filenames)} files')
|
3149
3120
|
images_by_set = defaultdict(list)
|
@@ -3185,7 +3156,6 @@ def _choose_model(model_name, device, object_type='cell', restore_type=None, obj
|
|
3185
3156
|
model_path = os.path.join(current_dir, 'models', 'cp', 'toxo_pv_lumen.CP_model')
|
3186
3157
|
print(model_path)
|
3187
3158
|
model = cp_models.CellposeModel(gpu=torch.cuda.is_available(), model_type=None, pretrained_model=model_path, diam_mean=diameter, device=device)
|
3188
|
-
#model = cp_models.Cellpose(gpu=torch.cuda.is_available(), model_type='cyto', device=device)
|
3189
3159
|
print(f'Using Toxoplasma PV lumen model to generate pathogen masks')
|
3190
3160
|
return model
|
3191
3161
|
|
@@ -3313,15 +3283,6 @@ class SaliencyMapGenerator:
|
|
3313
3283
|
return fig
|
3314
3284
|
|
3315
3285
|
def percentile_normalize(self, img, lower_percentile=2, upper_percentile=98):
|
3316
|
-
"""
|
3317
|
-
Normalize each channel of the image to the given percentiles.
|
3318
|
-
Args:
|
3319
|
-
img: Input image as numpy array with shape (H, W, C)
|
3320
|
-
lower_percentile: Lower percentile for normalization (default 2)
|
3321
|
-
upper_percentile: Upper percentile for normalization (default 98)
|
3322
|
-
Returns:
|
3323
|
-
img: Normalized image
|
3324
|
-
"""
|
3325
3286
|
img_normalized = np.zeros_like(img)
|
3326
3287
|
|
3327
3288
|
for c in range(img.shape[2]): # Iterate over each channel
|
@@ -3331,7 +3292,6 @@ class SaliencyMapGenerator:
|
|
3331
3292
|
|
3332
3293
|
return img_normalized
|
3333
3294
|
|
3334
|
-
|
3335
3295
|
class GradCAMGenerator:
|
3336
3296
|
def __init__(self, model, target_layer, cam_type='gradcam'):
|
3337
3297
|
self.model = model
|
@@ -3436,15 +3396,6 @@ class GradCAMGenerator:
|
|
3436
3396
|
return fig
|
3437
3397
|
|
3438
3398
|
def percentile_normalize(self, img, lower_percentile=2, upper_percentile=98):
|
3439
|
-
"""
|
3440
|
-
Normalize each channel of the image to the given percentiles.
|
3441
|
-
Args:
|
3442
|
-
img: Input image as numpy array with shape (H, W, C)
|
3443
|
-
lower_percentile: Lower percentile for normalization (default 2)
|
3444
|
-
upper_percentile: Upper percentile for normalization (default 98)
|
3445
|
-
Returns:
|
3446
|
-
img: Normalized image
|
3447
|
-
"""
|
3448
3399
|
img_normalized = np.zeros_like(img)
|
3449
3400
|
|
3450
3401
|
for c in range(img.shape[2]): # Iterate over each channel
|
@@ -4714,12 +4665,12 @@ def process_vision_results(df, threshold=0.5):
|
|
4714
4665
|
# Split the 'path' column using _map_wells function
|
4715
4666
|
mapped_values = df['path'].apply(lambda x: _map_wells(x))
|
4716
4667
|
|
4717
|
-
df['
|
4718
|
-
df['
|
4719
|
-
df['
|
4720
|
-
df['
|
4668
|
+
df['plateID'] = mapped_values.apply(lambda x: x[0])
|
4669
|
+
df['rowID'] = mapped_values.apply(lambda x: x[1])
|
4670
|
+
df['columnID'] = mapped_values.apply(lambda x: x[2])
|
4671
|
+
df['fieldID'] = mapped_values.apply(lambda x: x[3])
|
4721
4672
|
df['object'] = df['path'].str.split('_').str[3].str.split('.').str[0]
|
4722
|
-
df['prc'] = df['
|
4673
|
+
df['prc'] = df['plateID'].astype(str) + '_' + df['rowID'].astype(str) + '_' + df['columnID'].astype(str)
|
4723
4674
|
df['cv_predictions'] = (df['pred'] >= threshold).astype(int)
|
4724
4675
|
|
4725
4676
|
return df
|
@@ -4732,10 +4683,10 @@ def get_ml_results_paths(src, model_type='xgboost', channel_of_interest=1):
|
|
4732
4683
|
elif isinstance(channel_of_interest, int):
|
4733
4684
|
feature_string = f"channel_{channel_of_interest}"
|
4734
4685
|
|
4735
|
-
elif channel_of_interest
|
4686
|
+
elif channel_of_interest == 'morphology':
|
4736
4687
|
feature_string = 'morphology'
|
4737
4688
|
|
4738
|
-
elif channel_of_interest
|
4689
|
+
elif channel_of_interest == None:
|
4739
4690
|
feature_string = 'all_features'
|
4740
4691
|
else:
|
4741
4692
|
raise ValueError(f"Unsupported channel_of_interest: {channel_of_interest}. Supported values are 'int', 'list', 'None', or 'morphology'.")
|
@@ -5134,24 +5085,24 @@ def fill_holes_in_mask(mask):
|
|
5134
5085
|
|
5135
5086
|
def correct_metadata_column_names(df):
|
5136
5087
|
if 'plate_name' in df.columns:
|
5137
|
-
df = df.rename(columns={'plate_name': '
|
5088
|
+
df = df.rename(columns={'plate_name': 'plateID'})
|
5138
5089
|
if 'column_name' in df.columns:
|
5139
|
-
df = df.rename(columns={'column_name': '
|
5090
|
+
df = df.rename(columns={'column_name': 'columnID'})
|
5140
5091
|
if 'col' in df.columns:
|
5141
|
-
df = df.rename(columns={'col': '
|
5092
|
+
df = df.rename(columns={'col': 'columnID'})
|
5142
5093
|
if 'row_name' in df.columns:
|
5143
|
-
df = df.rename(columns={'row_name': '
|
5094
|
+
df = df.rename(columns={'row_name': 'rowID'})
|
5144
5095
|
if 'grna_name' in df.columns:
|
5145
5096
|
df = df.rename(columns={'grna_name': 'grna'})
|
5146
5097
|
if 'plate_row' in df.columns:
|
5147
|
-
df[['
|
5098
|
+
df[['plateID', 'rowID']] = df['plate_row'].str.split('_', expand=True)
|
5148
5099
|
return df
|
5149
5100
|
|
5150
|
-
def control_filelist(folder, mode='
|
5101
|
+
def control_filelist(folder, mode='columnID', values=['01','02']):
|
5151
5102
|
files = os.listdir(folder)
|
5152
|
-
if mode
|
5103
|
+
if mode == 'columnID':
|
5153
5104
|
filtered_files = [file for file in files if file.split('_')[1][1:] in values]
|
5154
|
-
if mode
|
5105
|
+
if mode == 'rowID':
|
5155
5106
|
filtered_files = [file for file in files if file.split('_')[1][:1] in values]
|
5156
5107
|
return filtered_files
|
5157
5108
|
|
@@ -5169,12 +5120,12 @@ def rename_columns_in_db(db_path):
|
|
5169
5120
|
columns_info = cursor.fetchall()
|
5170
5121
|
column_names = [col[1] for col in columns_info]
|
5171
5122
|
|
5172
|
-
# Check if columns '
|
5123
|
+
# Check if columns 'rowID' or 'columnID' exist
|
5173
5124
|
columns_to_rename = {}
|
5174
5125
|
if 'row' in column_names:
|
5175
|
-
columns_to_rename['row'] = '
|
5126
|
+
columns_to_rename['row'] = 'rowID'
|
5176
5127
|
if 'col' in column_names:
|
5177
|
-
columns_to_rename['col'] = '
|
5128
|
+
columns_to_rename['col'] = 'columnID'
|
5178
5129
|
|
5179
5130
|
# Rename columns if necessary
|
5180
5131
|
if columns_to_rename:
|
@@ -5418,3 +5369,110 @@ def normalize_src_path(src):
|
|
5418
5369
|
return src # Return as a string if not a list
|
5419
5370
|
|
5420
5371
|
raise ValueError(f"Invalid type for 'src': {type(src).__name__}, expected str or list")
|
5372
|
+
|
5373
|
+
def generate_image_path_map(root_folder, valid_extensions=("tif", "tiff", "png", "jpg", "jpeg", "bmp", "czi", "nd2", "lif")):
|
5374
|
+
"""
|
5375
|
+
Recursively scans a folder and its subfolders for images, then creates a mapping of:
|
5376
|
+
{original_image_path: new_image_path}, where the new path includes all subfolder names.
|
5377
|
+
|
5378
|
+
Args:
|
5379
|
+
root_folder (str): The root directory to scan for images.
|
5380
|
+
valid_extensions (tuple): Tuple of valid image file extensions.
|
5381
|
+
|
5382
|
+
Returns:
|
5383
|
+
dict: A dictionary mapping original image paths to their new paths.
|
5384
|
+
"""
|
5385
|
+
image_path_map = {}
|
5386
|
+
|
5387
|
+
for dirpath, _, filenames in os.walk(root_folder):
|
5388
|
+
for file in filenames:
|
5389
|
+
ext = file.lower().split('.')[-1]
|
5390
|
+
if ext in valid_extensions:
|
5391
|
+
# Get relative path of the image from root_folder
|
5392
|
+
relative_path = os.path.relpath(dirpath, root_folder)
|
5393
|
+
|
5394
|
+
# Construct new filename: Embed folder hierarchy into the name
|
5395
|
+
folder_parts = relative_path.split(os.sep) # Get all folder names
|
5396
|
+
folder_info = "_".join(folder_parts) if folder_parts else "" # Join with underscores
|
5397
|
+
|
5398
|
+
# Generate new filename
|
5399
|
+
new_filename = f"{folder_info}_{file}" if folder_info else file
|
5400
|
+
|
5401
|
+
# Store in dictionary (original path -> new path)
|
5402
|
+
original_path = os.path.join(dirpath, file)
|
5403
|
+
new_path = os.path.join(root_folder, new_filename)
|
5404
|
+
image_path_map[original_path] = new_path
|
5405
|
+
|
5406
|
+
return image_path_map
|
5407
|
+
|
5408
|
+
def copy_images_to_consolidated(image_path_map, root_folder):
|
5409
|
+
"""
|
5410
|
+
Copies images from their original locations to a 'consolidated' folder,
|
5411
|
+
renaming them according to the generated dictionary.
|
5412
|
+
|
5413
|
+
Args:
|
5414
|
+
image_path_map (dict): Dictionary mapping {original_path: new_path}.
|
5415
|
+
root_folder (str): The root directory where the 'consolidated' folder will be created.
|
5416
|
+
"""
|
5417
|
+
|
5418
|
+
consolidated_folder = os.path.join(root_folder, "consolidated")
|
5419
|
+
os.makedirs(consolidated_folder, exist_ok=True) # Ensure 'consolidated' folder exists
|
5420
|
+
files_processed = 0
|
5421
|
+
files_to_process = len(image_path_map)
|
5422
|
+
time_ls= []
|
5423
|
+
|
5424
|
+
for original_path, new_path in image_path_map.items():
|
5425
|
+
|
5426
|
+
start = time.time()
|
5427
|
+
new_filename = os.path.basename(new_path) # Extract only the new filename
|
5428
|
+
new_file_path = os.path.join(consolidated_folder, new_filename) # Place in 'consolidated' folder
|
5429
|
+
|
5430
|
+
shutil.copy2(original_path, new_file_path) # Copy file with metadata preserved
|
5431
|
+
|
5432
|
+
files_processed += 1
|
5433
|
+
stop = time.time()
|
5434
|
+
duration = (stop - start)
|
5435
|
+
time_ls.append(duration)
|
5436
|
+
|
5437
|
+
print_progress(files_processed, files_to_process, n_jobs=1, time_ls=time_ls, batch_size=None, operation_type=f'Consolidating images')
|
5438
|
+
#print(f"Copied: {original_path} -> {new_file_path}")
|
5439
|
+
|
5440
|
+
def correct_metadata(df):
|
5441
|
+
|
5442
|
+
#if 'object' in df.columns:
|
5443
|
+
# df['objectID'] = df['object']
|
5444
|
+
|
5445
|
+
if 'object_name' in df.columns:
|
5446
|
+
df['objectID'] = df['object_name']
|
5447
|
+
|
5448
|
+
if 'field_name' in df.columns:
|
5449
|
+
df['fieldID'] = df['field_name']
|
5450
|
+
|
5451
|
+
if 'plate' in df.columns:
|
5452
|
+
df['plateID'] = df['plate']
|
5453
|
+
|
5454
|
+
if 'plate_name' in df.columns:
|
5455
|
+
df['plateID'] = df['plate_name']
|
5456
|
+
|
5457
|
+
if 'row' in df.columns:
|
5458
|
+
df = df.rename(columns={'row': 'rowID'})
|
5459
|
+
|
5460
|
+
if 'row_name' in df.columns:
|
5461
|
+
df = df.rename(columns={'row_name': 'rowID'})
|
5462
|
+
|
5463
|
+
if 'col' in df.columns:
|
5464
|
+
df = df.rename(columns={'col': 'columnID'})
|
5465
|
+
|
5466
|
+
if 'column' in df.columns:
|
5467
|
+
df = df.rename(columns={'column': 'columnID'})
|
5468
|
+
|
5469
|
+
if 'column_name' in df.columns:
|
5470
|
+
df = df.rename(columns={'column_name': 'columnID'})
|
5471
|
+
|
5472
|
+
if 'field' in df.columns:
|
5473
|
+
df = df.rename(columns={'field': 'fieldID'})
|
5474
|
+
|
5475
|
+
if 'field_name' in df.columns:
|
5476
|
+
df = df.rename(columns={'field_name': 'fieldID'})
|
5477
|
+
|
5478
|
+
return df
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: spacr
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.5.0
|
4
4
|
Summary: Spatial phenotype analysis of crisp screens (SpaCr)
|
5
5
|
Home-page: https://github.com/EinarOlafsson/spacr
|
6
6
|
Author: Einar Birnir Olafsson
|
@@ -41,6 +41,9 @@ Requires-Dist: pillow<11.0,>=10.2.0
|
|
41
41
|
Requires-Dist: tifffile>=2023.4.12
|
42
42
|
Requires-Dist: nd2reader<4.0,>=3.3.0
|
43
43
|
Requires-Dist: czifile
|
44
|
+
Requires-Dist: pylibCZIrw<6.0,>=5.0.0
|
45
|
+
Requires-Dist: aicspylibczi
|
46
|
+
Requires-Dist: readlif
|
44
47
|
Requires-Dist: imageio<3.0,>=2.34.0
|
45
48
|
Requires-Dist: pingouin<1.0,>=0.5.5
|
46
49
|
Requires-Dist: umap-learn<1.0,>=0.5.6
|
@@ -78,7 +81,7 @@ Provides-Extra: headless
|
|
78
81
|
Requires-Dist: opencv-python-headless; extra == "headless"
|
79
82
|
|
80
83
|
.. |Documentation Status| image:: https://readthedocs.org/projects/spacr/badge/?version=latest
|
81
|
-
:target: https://
|
84
|
+
:target: https://einarolafsson.github.io/spacr
|
82
85
|
.. |PyPI version| image:: https://badge.fury.io/py/spacr.svg
|
83
86
|
:target: https://badge.fury.io/py/spacr
|
84
87
|
.. |Python version| image:: https://img.shields.io/pypi/pyversions/spacr
|