spacr 0.2.46__py3-none-any.whl → 0.2.56__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/core.py +306 -21
- spacr/deep_spacr.py +101 -41
- spacr/gui.py +1 -3
- spacr/gui_core.py +78 -65
- spacr/gui_elements.py +437 -152
- spacr/gui_utils.py +84 -73
- spacr/io.py +14 -7
- spacr/measure.py +196 -145
- spacr/plot.py +2 -42
- spacr/resources/font/open_sans/OFL.txt +93 -0
- spacr/resources/font/open_sans/OpenSans-Italic-VariableFont_wdth,wght.ttf +0 -0
- spacr/resources/font/open_sans/OpenSans-VariableFont_wdth,wght.ttf +0 -0
- spacr/resources/font/open_sans/README.txt +100 -0
- spacr/resources/font/open_sans/static/OpenSans-Bold.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans-BoldItalic.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans-ExtraBold.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans-ExtraBoldItalic.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans-Italic.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans-Light.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans-LightItalic.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans-Medium.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans-MediumItalic.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans-Regular.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans-SemiBold.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans-SemiBoldItalic.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_Condensed-Bold.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_Condensed-BoldItalic.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_Condensed-ExtraBold.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_Condensed-ExtraBoldItalic.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_Condensed-Italic.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_Condensed-Light.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_Condensed-LightItalic.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_Condensed-Medium.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_Condensed-MediumItalic.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_Condensed-Regular.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_Condensed-SemiBold.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_Condensed-SemiBoldItalic.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-Bold.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-BoldItalic.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-ExtraBold.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-ExtraBoldItalic.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-Italic.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-Light.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-LightItalic.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-Medium.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-MediumItalic.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-Regular.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-SemiBold.ttf +0 -0
- spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-SemiBoldItalic.ttf +0 -0
- spacr/sequencing.py +481 -587
- spacr/settings.py +197 -122
- spacr/utils.py +21 -13
- {spacr-0.2.46.dist-info → spacr-0.2.56.dist-info}/METADATA +7 -4
- spacr-0.2.56.dist-info/RECORD +100 -0
- spacr-0.2.46.dist-info/RECORD +0 -60
- {spacr-0.2.46.dist-info → spacr-0.2.56.dist-info}/LICENSE +0 -0
- {spacr-0.2.46.dist-info → spacr-0.2.56.dist-info}/WHEEL +0 -0
- {spacr-0.2.46.dist-info → spacr-0.2.56.dist-info}/entry_points.txt +0 -0
- {spacr-0.2.46.dist-info → spacr-0.2.56.dist-info}/top_level.txt +0 -0
spacr/core.py
CHANGED
@@ -877,7 +877,106 @@ def annotate_results(pred_loc):
|
|
877
877
|
display(df)
|
878
878
|
return df
|
879
879
|
|
880
|
-
def generate_dataset(
|
880
|
+
def generate_dataset(settings={}):
|
881
|
+
|
882
|
+
from .utils import initiate_counter, add_images_to_tar
|
883
|
+
|
884
|
+
db_path = os.path.join(settings['src'], 'measurements', 'measurements.db')
|
885
|
+
dst = os.path.join(settings['src'], 'datasets')
|
886
|
+
all_paths = []
|
887
|
+
|
888
|
+
# Connect to the database and retrieve the image paths
|
889
|
+
print(f"Reading DataBase: {db_path}")
|
890
|
+
try:
|
891
|
+
with sqlite3.connect(db_path) as conn:
|
892
|
+
cursor = conn.cursor()
|
893
|
+
if settings['file_metadata']:
|
894
|
+
if isinstance(settings['file_metadata'], str):
|
895
|
+
cursor.execute("SELECT png_path FROM png_list WHERE png_path LIKE ?", (f"%{settings['file_metadata']}%",))
|
896
|
+
else:
|
897
|
+
cursor.execute("SELECT png_path FROM png_list")
|
898
|
+
|
899
|
+
while True:
|
900
|
+
rows = cursor.fetchmany(1000)
|
901
|
+
if not rows:
|
902
|
+
break
|
903
|
+
all_paths.extend([row[0] for row in rows])
|
904
|
+
|
905
|
+
except sqlite3.Error as e:
|
906
|
+
print(f"Database error: {e}")
|
907
|
+
return
|
908
|
+
except Exception as e:
|
909
|
+
print(f"Error: {e}")
|
910
|
+
return
|
911
|
+
|
912
|
+
if isinstance(settings['sample'], int):
|
913
|
+
selected_paths = random.sample(all_paths, settings['sample'])
|
914
|
+
print(f"Random selection of {len(selected_paths)} paths")
|
915
|
+
else:
|
916
|
+
selected_paths = all_paths
|
917
|
+
random.shuffle(selected_paths)
|
918
|
+
print(f"All paths: {len(selected_paths)} paths")
|
919
|
+
|
920
|
+
total_images = len(selected_paths)
|
921
|
+
print(f"Found {total_images} images")
|
922
|
+
|
923
|
+
# Create a temp folder in dst
|
924
|
+
temp_dir = os.path.join(dst, "temp_tars")
|
925
|
+
os.makedirs(temp_dir, exist_ok=True)
|
926
|
+
|
927
|
+
# Chunking the data
|
928
|
+
num_procs = max(2, cpu_count() - 2)
|
929
|
+
chunk_size = len(selected_paths) // num_procs
|
930
|
+
remainder = len(selected_paths) % num_procs
|
931
|
+
|
932
|
+
paths_chunks = []
|
933
|
+
start = 0
|
934
|
+
for i in range(num_procs):
|
935
|
+
end = start + chunk_size + (1 if i < remainder else 0)
|
936
|
+
paths_chunks.append(selected_paths[start:end])
|
937
|
+
start = end
|
938
|
+
|
939
|
+
temp_tar_files = [os.path.join(temp_dir, f"temp_{i}.tar") for i in range(num_procs)]
|
940
|
+
|
941
|
+
print(f"Generating temporary tar files in {dst}")
|
942
|
+
|
943
|
+
# Initialize shared counter and lock
|
944
|
+
counter = Value('i', 0)
|
945
|
+
lock = Lock()
|
946
|
+
|
947
|
+
with Pool(processes=num_procs, initializer=initiate_counter, initargs=(counter, lock)) as pool:
|
948
|
+
pool.starmap(add_images_to_tar, [(paths_chunks[i], temp_tar_files[i], total_images) for i in range(num_procs)])
|
949
|
+
|
950
|
+
# Combine the temporary tar files into a final tar
|
951
|
+
date_name = datetime.date.today().strftime('%y%m%d')
|
952
|
+
if not settings['file_metadata'] is None:
|
953
|
+
tar_name = f"{date_name}_{settings['experiment']}_{settings['file_metadata']}.tar"
|
954
|
+
else:
|
955
|
+
tar_name = f"{date_name}_{settings['experiment']}.tar"
|
956
|
+
tar_name = os.path.join(dst, tar_name)
|
957
|
+
if os.path.exists(tar_name):
|
958
|
+
number = random.randint(1, 100)
|
959
|
+
tar_name_2 = f"{date_name}_{settings['experiment']}_{settings['file_metadata']}_{number}.tar"
|
960
|
+
print(f"Warning: {os.path.basename(tar_name)} exists, saving as {os.path.basename(tar_name_2)} ")
|
961
|
+
tar_name = os.path.join(dst, tar_name_2)
|
962
|
+
|
963
|
+
print(f"Merging temporary files")
|
964
|
+
|
965
|
+
with tarfile.open(tar_name, 'w') as final_tar:
|
966
|
+
for temp_tar_path in temp_tar_files:
|
967
|
+
with tarfile.open(temp_tar_path, 'r') as temp_tar:
|
968
|
+
for member in temp_tar.getmembers():
|
969
|
+
file_obj = temp_tar.extractfile(member)
|
970
|
+
final_tar.addfile(member, file_obj)
|
971
|
+
os.remove(temp_tar_path)
|
972
|
+
|
973
|
+
# Delete the temp folder
|
974
|
+
shutil.rmtree(temp_dir)
|
975
|
+
print(f"\nSaved {total_images} images to {tar_name}")
|
976
|
+
|
977
|
+
return tar_name
|
978
|
+
|
979
|
+
def generate_dataset_v1(src, file_metadata=None, experiment='TSG101_screen', sample=None):
|
881
980
|
|
882
981
|
from .utils import initiate_counter, add_images_to_tar
|
883
982
|
|
@@ -974,7 +1073,7 @@ def generate_dataset(src, file_metadata=None, experiment='TSG101_screen', sample
|
|
974
1073
|
shutil.rmtree(temp_dir)
|
975
1074
|
print(f"\nSaved {total_images} images to {tar_name}")
|
976
1075
|
|
977
|
-
def
|
1076
|
+
def apply_model_to_tar_v1(tar_path, model_path, file_type='cell_png', image_size=224, batch_size=64, normalize=True, preload='images', n_jobs=10, threshold=0.5, verbose=False):
|
978
1077
|
|
979
1078
|
from .io import TarImageDataset
|
980
1079
|
from .utils import process_vision_results, print_progress
|
@@ -1044,6 +1143,76 @@ def apply_model_to_tar(tar_path, model_path, file_type='cell_png', image_size=22
|
|
1044
1143
|
torch.cuda.memory.empty_cache()
|
1045
1144
|
return df
|
1046
1145
|
|
1146
|
+
def apply_model_to_tar(settings={}):
|
1147
|
+
|
1148
|
+
from .io import TarImageDataset
|
1149
|
+
from .utils import process_vision_results, print_progress
|
1150
|
+
|
1151
|
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
1152
|
+
if settings['normalize']:
|
1153
|
+
transform = transforms.Compose([
|
1154
|
+
transforms.ToTensor(),
|
1155
|
+
transforms.CenterCrop(size=(settings['image_size'], settings['image_size'])),
|
1156
|
+
transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])
|
1157
|
+
else:
|
1158
|
+
transform = transforms.Compose([
|
1159
|
+
transforms.ToTensor(),
|
1160
|
+
transforms.CenterCrop(size=(settings['image_size'], settings['image_size']))])
|
1161
|
+
|
1162
|
+
if settings['verbose']:
|
1163
|
+
print(f"Loading model from {settings['model_path']}")
|
1164
|
+
print(f"Loading dataset from {settings['tar_path']}")
|
1165
|
+
|
1166
|
+
model = torch.load(settings['model_path'])
|
1167
|
+
|
1168
|
+
dataset = TarImageDataset(settings['tar_path'], transform=transform)
|
1169
|
+
data_loader = DataLoader(dataset, batch_size=settings['batch_size'], shuffle=True, num_workers=settings['n_jobs'], pin_memory=True)
|
1170
|
+
|
1171
|
+
model_name = os.path.splitext(os.path.basename(settings['model_path']))[0]
|
1172
|
+
dataset_name = os.path.splitext(os.path.basename(settings['tar_path']))[0]
|
1173
|
+
date_name = datetime.date.today().strftime('%y%m%d')
|
1174
|
+
dst = os.path.dirname(settings['tar_path'])
|
1175
|
+
result_loc = f'{dst}/{date_name}_{dataset_name}_{model_name}_result.csv'
|
1176
|
+
|
1177
|
+
model.eval()
|
1178
|
+
model = model.to(device)
|
1179
|
+
|
1180
|
+
if settings['verbose']:
|
1181
|
+
print(model)
|
1182
|
+
print(f'Generated dataset with {len(dataset)} images')
|
1183
|
+
print(f'Generating loader from {len(data_loader)} batches')
|
1184
|
+
print(f'Results wil be saved in: {result_loc}')
|
1185
|
+
print(f'Model is in eval mode')
|
1186
|
+
print(f'Model loaded to device')
|
1187
|
+
|
1188
|
+
prediction_pos_probs = []
|
1189
|
+
filenames_list = []
|
1190
|
+
time_ls = []
|
1191
|
+
gc.collect()
|
1192
|
+
with torch.no_grad():
|
1193
|
+
for batch_idx, (batch_images, filenames) in enumerate(data_loader, start=1):
|
1194
|
+
start = time.time()
|
1195
|
+
images = batch_images.to(torch.float).to(device)
|
1196
|
+
outputs = model(images)
|
1197
|
+
batch_prediction_pos_prob = torch.sigmoid(outputs).cpu().numpy()
|
1198
|
+
prediction_pos_probs.extend(batch_prediction_pos_prob.tolist())
|
1199
|
+
filenames_list.extend(filenames)
|
1200
|
+
stop = time.time()
|
1201
|
+
duration = stop - start
|
1202
|
+
time_ls.append(duration)
|
1203
|
+
files_processed = batch_idx*settings['batch_size']
|
1204
|
+
files_to_process = len(data_loader)
|
1205
|
+
print_progress(files_processed, files_to_process, n_jobs=settings['n_jobs'], time_ls=time_ls, batch_size=settings['batch_size'], operation_type="Tar dataset")
|
1206
|
+
|
1207
|
+
data = {'path':filenames_list, 'pred':prediction_pos_probs}
|
1208
|
+
df = pd.DataFrame(data, index=None)
|
1209
|
+
df = process_vision_results(df, settings['score_threshold'])
|
1210
|
+
|
1211
|
+
df.to_csv(result_loc, index=True, header=True, mode='w')
|
1212
|
+
torch.cuda.empty_cache()
|
1213
|
+
torch.cuda.memory.empty_cache()
|
1214
|
+
return df
|
1215
|
+
|
1047
1216
|
def apply_model(src, model_path, image_size=224, batch_size=64, normalize=True, n_jobs=10):
|
1048
1217
|
|
1049
1218
|
from .io import NoClassDataset
|
@@ -1206,19 +1375,19 @@ def generate_dataset_from_lists(dst, class_data, classes, test_split=0.1):
|
|
1206
1375
|
for path in train_data:
|
1207
1376
|
start = time.time()
|
1208
1377
|
shutil.copy(path, os.path.join(train_class_dir, os.path.basename(path)))
|
1209
|
-
processed_files += 1
|
1210
1378
|
duration = time.time() - start
|
1211
1379
|
time_ls.append(duration)
|
1212
1380
|
print_progress(processed_files, total_files, n_jobs=1, time_ls=None, batch_size=None, operation_type="Copying files for Train dataset")
|
1381
|
+
processed_files += 1
|
1213
1382
|
|
1214
1383
|
# Copy test files
|
1215
1384
|
for path in test_data:
|
1216
1385
|
start = time.time()
|
1217
1386
|
shutil.copy(path, os.path.join(test_class_dir, os.path.basename(path)))
|
1218
|
-
processed_files += 1
|
1219
1387
|
duration = time.time() - start
|
1220
1388
|
time_ls.append(duration)
|
1221
1389
|
print_progress(processed_files, total_files, n_jobs=1, time_ls=None, batch_size=None, operation_type="Copying files for Test dataset")
|
1390
|
+
processed_files += 1
|
1222
1391
|
|
1223
1392
|
# Print summary
|
1224
1393
|
for cls in classes:
|
@@ -1226,9 +1395,9 @@ def generate_dataset_from_lists(dst, class_data, classes, test_split=0.1):
|
|
1226
1395
|
test_class_dir = os.path.join(dst, f'test/{cls}')
|
1227
1396
|
print(f'Train class {cls}: {len(os.listdir(train_class_dir))}, Test class {cls}: {len(os.listdir(test_class_dir))}')
|
1228
1397
|
|
1229
|
-
return
|
1398
|
+
return os.path.join(dst, 'train'), os.path.join(dst, 'test')
|
1230
1399
|
|
1231
|
-
def
|
1400
|
+
def generate_training_dataset_v1(src, mode='annotation', annotation_column='test', annotated_classes=[1,2], classes=['nc','pc'], size=200, test_split=0.1, class_metadata=[['c1'],['c2']], metadata_type_by='col', channel_of_interest=3, custom_measurement=None, tables=None, png_type='cell_png'):
|
1232
1401
|
|
1233
1402
|
from .io import _read_and_merge_data, _read_db
|
1234
1403
|
from .utils import get_paths_from_db, annotate_conditions
|
@@ -1329,6 +1498,110 @@ def generate_training_dataset(src, mode='annotation', annotation_column='test',
|
|
1329
1498
|
|
1330
1499
|
return
|
1331
1500
|
|
1501
|
+
def generate_training_dataset(settings):
|
1502
|
+
|
1503
|
+
from .io import _read_and_merge_data, _read_db
|
1504
|
+
from .utils import get_paths_from_db, annotate_conditions
|
1505
|
+
from .settings import set_generate_training_dataset_defaults
|
1506
|
+
|
1507
|
+
settings = set_generate_training_dataset_defaults(settings)
|
1508
|
+
|
1509
|
+
db_path = os.path.join(settings['src'], 'measurements','measurements.db')
|
1510
|
+
dst = os.path.join(settings['src'], 'datasets', 'training')
|
1511
|
+
|
1512
|
+
if os.path.exists(dst):
|
1513
|
+
for i in range(1, 1000):
|
1514
|
+
dst = os.path.join(settings['src'], 'datasets', f'training_{i}')
|
1515
|
+
if not os.path.exists(dst):
|
1516
|
+
print(f'Creating new directory for training: {dst}')
|
1517
|
+
break
|
1518
|
+
|
1519
|
+
if settings['dataset_mode'] == 'annotation':
|
1520
|
+
class_paths_ls_2 = []
|
1521
|
+
class_paths_ls = training_dataset_from_annotation(db_path, dst, settings['annotation_column'], annotated_classes=settings['annotated_classes'])
|
1522
|
+
for class_paths in class_paths_ls:
|
1523
|
+
class_paths_temp = random.sample(class_paths, settings['size'])
|
1524
|
+
class_paths_ls_2.append(class_paths_temp)
|
1525
|
+
class_paths_ls = class_paths_ls_2
|
1526
|
+
|
1527
|
+
elif settings['dataset_mode'] == 'metadata':
|
1528
|
+
class_paths_ls = []
|
1529
|
+
class_len_ls = []
|
1530
|
+
[df] = _read_db(db_loc=db_path, tables=['png_list'])
|
1531
|
+
df['metadata_based_class'] = pd.NA
|
1532
|
+
for i, class_ in enumerate(settings['classes']):
|
1533
|
+
ls = settings['class_metadata'][i]
|
1534
|
+
df.loc[df[settings['metadata_type_by']].isin(ls), 'metadata_based_class'] = class_
|
1535
|
+
|
1536
|
+
for class_ in settings['classes']:
|
1537
|
+
if settings['size'] == None:
|
1538
|
+
c_s = []
|
1539
|
+
for c in settings['classes']:
|
1540
|
+
c_s_t_df = df[df['metadata_based_class'] == c]
|
1541
|
+
c_s.append(len(c_s_t_df))
|
1542
|
+
print(f'Found {len(c_s_t_df)} images for class {c}')
|
1543
|
+
size = min(c_s)
|
1544
|
+
print(f'Using the smallest class size: {size}')
|
1545
|
+
|
1546
|
+
class_temp_df = df[df['metadata_based_class'] == class_]
|
1547
|
+
class_len_ls.append(len(class_temp_df))
|
1548
|
+
print(f'Found {len(class_temp_df)} images for class {class_}')
|
1549
|
+
class_paths_temp = random.sample(class_temp_df['png_path'].tolist(), settings['size'])
|
1550
|
+
class_paths_ls.append(class_paths_temp)
|
1551
|
+
|
1552
|
+
elif settings['dataset_mode'] == 'recruitment':
|
1553
|
+
class_paths_ls = []
|
1554
|
+
if not isinstance(settings['tables'], list):
|
1555
|
+
tables = ['cell', 'nucleus', 'pathogen','cytoplasm']
|
1556
|
+
|
1557
|
+
df, _ = _read_and_merge_data(locs=[db_path],
|
1558
|
+
tables=tables,
|
1559
|
+
verbose=False,
|
1560
|
+
include_multinucleated=True,
|
1561
|
+
include_multiinfected=True,
|
1562
|
+
include_noninfected=True)
|
1563
|
+
|
1564
|
+
print('length df 1', len(df))
|
1565
|
+
|
1566
|
+
df = annotate_conditions(df, cells=['HeLa'], cell_loc=None, pathogens=['pathogen'], pathogen_loc=None, treatments=settings['classes'], treatment_loc=settings['class_metadata'], types = settings['metadata_type_by'])
|
1567
|
+
print('length df 2', len(df))
|
1568
|
+
[png_list_df] = _read_db(db_loc=db_path, tables=['png_list'])
|
1569
|
+
|
1570
|
+
if settings['custom_measurement'] != None:
|
1571
|
+
|
1572
|
+
if not isinstance(settings['custom_measurement'], list):
|
1573
|
+
print(f'custom_measurement should be a list, add [ measurement_1, measurement_2 ] or [ measurement ]')
|
1574
|
+
return
|
1575
|
+
|
1576
|
+
if isinstance(settings['custom_measurement'], list):
|
1577
|
+
if len(settings['custom_measurement']) == 2:
|
1578
|
+
print(f"Classes will be defined by the Q1 and Q3 quantiles of recruitment ({settings['custom_measurement'][0]}/{settings['custom_measurement'][1]})")
|
1579
|
+
df['recruitment'] = df[f"{settings['custom_measurement'][0]}']/df[f'{settings['custom_measurement'][1]}"]
|
1580
|
+
if len(settings['custom_measurement']) == 1:
|
1581
|
+
print(f"Classes will be defined by the Q1 and Q3 quantiles of recruitment ({settings['custom_measurement'][0]})")
|
1582
|
+
df['recruitment'] = df[f"{settings['custom_measurement'][0]}"]
|
1583
|
+
else:
|
1584
|
+
print(f"Classes will be defined by the Q1 and Q3 quantiles of recruitment (pathogen/cytoplasm for channel {settings['channel_of_interest']})")
|
1585
|
+
df['recruitment'] = df[f"pathogen_channel_{settings['channel_of_interest']}_mean_intensity']/df[f'cytoplasm_channel_{settings['channel_of_interest']}_mean_intensity"]
|
1586
|
+
|
1587
|
+
q25 = df['recruitment'].quantile(0.25)
|
1588
|
+
q75 = df['recruitment'].quantile(0.75)
|
1589
|
+
df_lower = df[df['recruitment'] <= q25]
|
1590
|
+
df_upper = df[df['recruitment'] >= q75]
|
1591
|
+
|
1592
|
+
class_paths_lower = get_paths_from_db(df=df_lower, png_df=png_list_df, image_type=settings['png_type'])
|
1593
|
+
|
1594
|
+
class_paths_lower = random.sample(class_paths_lower['png_path'].tolist(), settings['size'])
|
1595
|
+
class_paths_ls.append(class_paths_lower)
|
1596
|
+
|
1597
|
+
class_paths_upper = get_paths_from_db(df=df_upper, png_df=png_list_df, image_type=settings['png_type'])
|
1598
|
+
class_paths_upper = random.sample(class_paths_upper['png_path'].tolist(), settings['size'])
|
1599
|
+
class_paths_ls.append(class_paths_upper)
|
1600
|
+
|
1601
|
+
train_class_dir, test_class_dir = generate_dataset_from_lists(dst, class_data=class_paths_ls, classes=settings['classes'], test_split=settings['test_split'])
|
1602
|
+
|
1603
|
+
return train_class_dir, test_class_dir
|
1604
|
+
|
1332
1605
|
def generate_loaders(src, train_mode='erm', mode='train', image_size=224, batch_size=32, classes=['nc','pc'], n_jobs=None, validation_split=0.0, max_show=2, pin_memory=False, normalize=False, channels=[1, 2, 3], augment=False, verbose=False):
|
1333
1606
|
|
1334
1607
|
"""
|
@@ -1729,20 +2002,43 @@ def preprocess_generate_masks(src, settings={}):
|
|
1729
2002
|
|
1730
2003
|
if settings['preprocess']:
|
1731
2004
|
settings, src = preprocess_img_data(settings)
|
1732
|
-
|
2005
|
+
|
2006
|
+
files_to_process = 3
|
2007
|
+
files_processed = 0
|
1733
2008
|
if settings['masks']:
|
1734
2009
|
mask_src = os.path.join(src, 'norm_channel_stack')
|
1735
2010
|
if settings['cell_channel'] != None:
|
2011
|
+
time_ls=[]
|
1736
2012
|
if check_mask_folder(src, 'cell_mask_stack'):
|
2013
|
+
start = time.time()
|
1737
2014
|
generate_cellpose_masks(mask_src, settings, 'cell')
|
2015
|
+
stop = time.time()
|
2016
|
+
duration = (stop - start)
|
2017
|
+
time_ls.append(duration)
|
2018
|
+
files_processed += 1
|
2019
|
+
print_progress(files_processed, files_to_process, n_jobs=1, time_ls=time_ls, batch_size=None, operation_type=f'cell_mask_gen')
|
1738
2020
|
|
1739
2021
|
if settings['nucleus_channel'] != None:
|
2022
|
+
time_ls=[]
|
1740
2023
|
if check_mask_folder(src, 'nucleus_mask_stack'):
|
2024
|
+
start = time.time()
|
1741
2025
|
generate_cellpose_masks(mask_src, settings, 'nucleus')
|
2026
|
+
stop = time.time()
|
2027
|
+
duration = (stop - start)
|
2028
|
+
time_ls.append(duration)
|
2029
|
+
files_processed += 1
|
2030
|
+
print_progress(files_processed, files_to_process, n_jobs=1, time_ls=time_ls, batch_size=None, operation_type=f'nucleus_mask_gen')
|
1742
2031
|
|
1743
2032
|
if settings['pathogen_channel'] != None:
|
2033
|
+
time_ls=[]
|
1744
2034
|
if check_mask_folder(src, 'pathogen_mask_stack'):
|
2035
|
+
start = time.time()
|
1745
2036
|
generate_cellpose_masks(mask_src, settings, 'pathogen')
|
2037
|
+
stop = time.time()
|
2038
|
+
duration = (stop - start)
|
2039
|
+
time_ls.append(duration)
|
2040
|
+
files_processed += 1
|
2041
|
+
print_progress(files_processed, files_to_process, n_jobs=1, time_ls=time_ls, batch_size=None, operation_type=f'pathogen_mask_gen')
|
1746
2042
|
|
1747
2043
|
#if settings['organelle'] != None:
|
1748
2044
|
# if check_mask_folder(src, 'organelle_mask_stack'):
|
@@ -2012,7 +2308,6 @@ def generate_cellpose_masks(src, settings, object_type):
|
|
2012
2308
|
average_sizes = []
|
2013
2309
|
time_ls = []
|
2014
2310
|
|
2015
|
-
files_to_process = len(paths)
|
2016
2311
|
for file_index, path in enumerate(paths):
|
2017
2312
|
name = os.path.basename(path)
|
2018
2313
|
name, ext = os.path.splitext(name)
|
@@ -2050,7 +2345,6 @@ def generate_cellpose_masks(src, settings, object_type):
|
|
2050
2345
|
print(f'Cut batch at indecies: {timelapse_frame_limits}, New batch_size: {batch_size} ')
|
2051
2346
|
|
2052
2347
|
for i in range(0, stack.shape[0], batch_size):
|
2053
|
-
start = time.time()
|
2054
2348
|
mask_stack = []
|
2055
2349
|
if stack.shape[3] == 1:
|
2056
2350
|
batch = stack[i: i+batch_size, :, :, [0,0]].astype(stack.dtype)
|
@@ -2072,14 +2366,6 @@ def generate_cellpose_masks(src, settings, object_type):
|
|
2072
2366
|
save_path = os.path.join(movie_path, f'timelapse_{object_type}_{name}.mp4')
|
2073
2367
|
_npz_to_movie(batch, batch_filenames, save_path, fps=2)
|
2074
2368
|
|
2075
|
-
stop = time.time()
|
2076
|
-
duration = (stop - start)
|
2077
|
-
time_ls.append(duration)
|
2078
|
-
files_processed = (file_index+1)*len(batch_filenames)
|
2079
|
-
files_processed = len(paths)*batch.shape[0]
|
2080
|
-
print('file_index', file_index, 'len(paths)', len(paths), 'batch.shape[0]', batch.shape[0])
|
2081
|
-
print_progress(files_processed, files_to_process, n_jobs=1, time_ls=time_ls, batch_size=batch.shape[0], operation_type=f'{object_type}_mask_gen')
|
2082
|
-
|
2083
2369
|
output = model.eval(x=batch,
|
2084
2370
|
batch_size=cellpose_batch_size,
|
2085
2371
|
normalize=False,
|
@@ -2484,7 +2770,6 @@ def ml_analysis(df, channel_of_interest=3, location_column='col', positive_contr
|
|
2484
2770
|
df_metadata = df[[location_column]].copy()
|
2485
2771
|
df, features = filter_dataframe_features(df, channel_of_interest, exclude, remove_low_variance_features, remove_highly_correlated_features, verbose)
|
2486
2772
|
|
2487
|
-
|
2488
2773
|
if verbose:
|
2489
2774
|
print(f'Found {len(features)} numerical features in the dataframe')
|
2490
2775
|
print(f'Features used in training: {features}')
|
@@ -2629,7 +2914,6 @@ def check_index(df, elements=5, split_char='_'):
|
|
2629
2914
|
print(idx)
|
2630
2915
|
raise ValueError(f"Found {len(problematic_indices)} problematic indices that do not split into {elements} parts.")
|
2631
2916
|
|
2632
|
-
#def plate_heatmap(src, model_type='xgboost', variable='predictions', grouping='mean', min_max='allq', cmap='viridis', channel_of_interest=3, min_count=25, n_estimators=100, col_to_compare='col', pos='c2', neg='c1', exclude=None, n_repeats=10, clean=True, nr_to_plot=20, verbose=False, n_jobs=-1):
|
2633
2917
|
def generate_ml_scores(src, settings):
|
2634
2918
|
|
2635
2919
|
from .io import _read_and_merge_data
|
@@ -2667,7 +2951,7 @@ def generate_ml_scores(src, settings):
|
|
2667
2951
|
settings['top_features'],
|
2668
2952
|
settings['n_estimators'],
|
2669
2953
|
settings['test_size'],
|
2670
|
-
settings['
|
2954
|
+
settings['model_type_ml'],
|
2671
2955
|
settings['n_jobs'],
|
2672
2956
|
settings['remove_low_variance_features'],
|
2673
2957
|
settings['remove_highly_correlated_features'],
|
@@ -2688,7 +2972,7 @@ def generate_ml_scores(src, settings):
|
|
2688
2972
|
min_count=settings['minimum_cell_count'],
|
2689
2973
|
verbose=settings['verbose'])
|
2690
2974
|
|
2691
|
-
data_path, permutation_path, feature_importance_path, model_metricks_path, permutation_fig_path, feature_importance_fig_path, shap_fig_path, plate_heatmap_path, settings_csv = get_ml_results_paths(src, settings['
|
2975
|
+
data_path, permutation_path, feature_importance_path, model_metricks_path, permutation_fig_path, feature_importance_fig_path, shap_fig_path, plate_heatmap_path, settings_csv = get_ml_results_paths(src, settings['model_type_ml'], settings['channel_of_interest'])
|
2692
2976
|
df, permutation_df, feature_importance_df, _, _, _, _, _, metrics_df = output
|
2693
2977
|
|
2694
2978
|
settings_df.to_csv(settings_csv, index=False)
|
@@ -2845,6 +3129,7 @@ def generate_image_umap(settings={}):
|
|
2845
3129
|
settings['plot_outlines'] = False
|
2846
3130
|
settings['smooth_lines'] = False
|
2847
3131
|
|
3132
|
+
print(f'Generating Image UMAP ...')
|
2848
3133
|
settings_df = pd.DataFrame(list(settings.items()), columns=['Key', 'Value'])
|
2849
3134
|
settings_dir = os.path.join(settings['src'][0],'settings')
|
2850
3135
|
settings_csv = os.path.join(settings_dir,'embedding_settings.csv')
|