spacr 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/__init__.py +19 -3
- spacr/cellpose.py +311 -0
- spacr/core.py +142 -2495
- spacr/deep_spacr.py +151 -29
- spacr/gui.py +1 -0
- spacr/gui_core.py +74 -63
- spacr/gui_elements.py +110 -5
- spacr/gui_utils.py +346 -6
- spacr/io.py +631 -51
- spacr/logger.py +28 -9
- spacr/measure.py +107 -95
- spacr/mediar.py +0 -5
- spacr/ml.py +964 -0
- spacr/openai.py +37 -0
- spacr/plot.py +281 -16
- spacr/resources/data/lopit.csv +3833 -0
- spacr/resources/data/toxoplasma_metadata.csv +8843 -0
- spacr/resources/icons/convert.png +0 -0
- spacr/resources/{models/cp/toxo_plaque_cyto_e25000_X1120_Y1120.CP_model → icons/dna_matrix.mp4} +0 -0
- spacr/sequencing.py +241 -1311
- spacr/settings.py +129 -43
- spacr/sim.py +0 -2
- spacr/submodules.py +348 -0
- spacr/timelapse.py +0 -2
- spacr/toxo.py +233 -0
- spacr/utils.py +275 -173
- {spacr-0.3.0.dist-info → spacr-0.3.2.dist-info}/METADATA +7 -1
- {spacr-0.3.0.dist-info → spacr-0.3.2.dist-info}/RECORD +32 -33
- spacr/chris.py +0 -50
- spacr/graph_learning.py +0 -340
- spacr/resources/MEDIAR/.git +0 -1
- spacr/resources/MEDIAR_weights/.DS_Store +0 -0
- spacr/resources/icons/.DS_Store +0 -0
- spacr/resources/icons/spacr_logo_rotation.gif +0 -0
- spacr/resources/models/cp/toxo_plaque_cyto_e25000_X1120_Y1120.CP_model_settings.csv +0 -23
- spacr/resources/models/cp/toxo_pv_lumen.CP_model +0 -0
- spacr/sim_app.py +0 -0
- {spacr-0.3.0.dist-info → spacr-0.3.2.dist-info}/LICENSE +0 -0
- {spacr-0.3.0.dist-info → spacr-0.3.2.dist-info}/WHEEL +0 -0
- {spacr-0.3.0.dist-info → spacr-0.3.2.dist-info}/entry_points.txt +0 -0
- {spacr-0.3.0.dist-info → spacr-0.3.2.dist-info}/top_level.txt +0 -0
spacr/utils.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
|
-
import
|
1
|
+
import os, re, sqlite3, torch, torchvision, random, string, shutil, cv2, tarfile, glob, psutil, platform, gzip, subprocess, time, requests
|
2
2
|
|
3
3
|
import numpy as np
|
4
|
+
import pandas as pd
|
4
5
|
from cellpose import models as cp_models
|
5
6
|
from cellpose import denoise
|
6
7
|
|
@@ -14,7 +15,6 @@ from skimage.segmentation import clear_border
|
|
14
15
|
|
15
16
|
from collections import defaultdict, OrderedDict
|
16
17
|
from PIL import Image
|
17
|
-
import pandas as pd
|
18
18
|
from statsmodels.stats.outliers_influence import variance_inflation_factor
|
19
19
|
from statsmodels.stats.stattools import durbin_watson
|
20
20
|
import statsmodels.formula.api as smf
|
@@ -24,7 +24,7 @@ from itertools import combinations
|
|
24
24
|
from functools import reduce
|
25
25
|
from IPython.display import display
|
26
26
|
|
27
|
-
from multiprocessing import Pool, cpu_count
|
27
|
+
from multiprocessing import Pool, cpu_count, set_start_method, get_start_method
|
28
28
|
from concurrent.futures import ThreadPoolExecutor
|
29
29
|
|
30
30
|
import torch.nn as nn
|
@@ -33,63 +33,44 @@ from torch.utils.checkpoint import checkpoint
|
|
33
33
|
from torch.utils.data import Subset
|
34
34
|
from torch.autograd import grad
|
35
35
|
|
36
|
+
from torchvision import models
|
37
|
+
from torchvision.models.resnet import ResNet18_Weights, ResNet34_Weights, ResNet50_Weights, ResNet101_Weights, ResNet152_Weights
|
38
|
+
import torchvision.transforms as transforms
|
39
|
+
from torchvision.models import resnet50
|
40
|
+
|
36
41
|
import seaborn as sns
|
37
42
|
import matplotlib.pyplot as plt
|
38
43
|
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
|
39
44
|
|
45
|
+
from scipy import stats
|
40
46
|
import scipy.ndimage as ndi
|
41
47
|
from scipy.spatial import distance
|
42
|
-
from scipy.stats import fisher_exact
|
48
|
+
from scipy.stats import fisher_exact, f_oneway, kruskal
|
43
49
|
from scipy.ndimage.filters import gaussian_filter
|
44
50
|
from scipy.spatial import ConvexHull
|
45
51
|
from scipy.interpolate import splprep, splev
|
46
52
|
from scipy.ndimage import binary_dilation
|
47
53
|
|
48
|
-
from sklearn.preprocessing import StandardScaler
|
49
54
|
from skimage.exposure import rescale_intensity
|
50
55
|
from sklearn.metrics import auc, precision_recall_curve
|
51
56
|
from sklearn.model_selection import train_test_split
|
52
57
|
from sklearn.linear_model import Lasso, Ridge
|
53
|
-
from sklearn.preprocessing import OneHotEncoder
|
54
|
-
from sklearn.cluster import KMeans
|
55
|
-
from sklearn.preprocessing import StandardScaler
|
56
|
-
from sklearn.cluster import DBSCAN
|
57
|
-
from sklearn.cluster import KMeans
|
58
|
+
from sklearn.preprocessing import OneHotEncoder, StandardScaler
|
59
|
+
from sklearn.cluster import KMeans, DBSCAN
|
58
60
|
from sklearn.manifold import TSNE
|
59
|
-
from sklearn.cluster import KMeans
|
60
61
|
from sklearn.decomposition import PCA
|
61
|
-
|
62
|
-
import umap.umap_ as umap
|
63
|
-
|
64
|
-
from torchvision import models
|
65
|
-
from torchvision.models.resnet import ResNet18_Weights, ResNet34_Weights, ResNet50_Weights, ResNet101_Weights, ResNet152_Weights
|
66
|
-
import torchvision.transforms as transforms
|
67
|
-
|
68
62
|
from sklearn.ensemble import RandomForestClassifier
|
69
|
-
from sklearn.preprocessing import StandardScaler
|
70
|
-
from scipy.stats import f_oneway, kruskal
|
71
|
-
from sklearn.cluster import KMeans
|
72
|
-
from scipy import stats
|
73
63
|
|
74
|
-
from
|
75
|
-
from multiprocessing import set_start_method, get_start_method
|
64
|
+
from huggingface_hub import list_repo_files
|
76
65
|
|
77
|
-
import
|
78
|
-
import
|
79
|
-
from torchvision.models import resnet50
|
80
|
-
from PIL import Image
|
81
|
-
import numpy as np
|
82
|
-
import umap
|
83
|
-
import pandas as pd
|
84
|
-
from sklearn.ensemble import RandomForestClassifier
|
85
|
-
from sklearn.preprocessing import StandardScaler
|
86
|
-
from scipy.stats import f_oneway, kruskal
|
87
|
-
from sklearn.cluster import KMeans
|
88
|
-
from scipy import stats
|
66
|
+
import umap.umap_ as umap
|
67
|
+
#import umap
|
89
68
|
|
90
|
-
def save_settings(settings, name='settings'):
|
69
|
+
def save_settings(settings, name='settings', show=False):
|
91
70
|
|
92
71
|
settings_df = pd.DataFrame(list(settings.items()), columns=['Key', 'Value'])
|
72
|
+
if show:
|
73
|
+
display(settings_df)
|
93
74
|
settings_csv = os.path.join(settings['src'],'settings',f'{name}.csv')
|
94
75
|
os.makedirs(os.path.join(settings['src'],'settings'), exist_ok=True)
|
95
76
|
settings_df.to_csv(settings_csv, index=False)
|
@@ -303,7 +284,10 @@ def _get_cellpose_batch_size():
|
|
303
284
|
except Exception as e:
|
304
285
|
return 8
|
305
286
|
|
306
|
-
def
|
287
|
+
def _extract_filename_metadata_v1(filenames, src, regular_expression, metadata_type='cellvoyager', pick_slice=False, skip_mode='01'):
|
288
|
+
|
289
|
+
images_by_key = defaultdict(list)
|
290
|
+
|
307
291
|
for filename in filenames:
|
308
292
|
match = regular_expression.match(filename)
|
309
293
|
if match:
|
@@ -328,7 +312,6 @@ def _extract_filename_metadata(filenames, src, images_by_key, regular_expression
|
|
328
312
|
if metadata_type =='cq1':
|
329
313
|
orig_wellID = wellID
|
330
314
|
wellID = _convert_cq1_well_id(wellID)
|
331
|
-
#clear_output(wait=True)
|
332
315
|
print(f'Converted Well ID: {orig_wellID} to {wellID}', end='\r', flush=True)
|
333
316
|
|
334
317
|
if pick_slice:
|
@@ -338,7 +321,7 @@ def _extract_filename_metadata(filenames, src, images_by_key, regular_expression
|
|
338
321
|
sliceid = '00'
|
339
322
|
|
340
323
|
if mode == skip_mode:
|
341
|
-
continue
|
324
|
+
continue
|
342
325
|
|
343
326
|
key = (plate, well, field, channel, mode)
|
344
327
|
with Image.open(os.path.join(src, filename)) as img:
|
@@ -351,6 +334,57 @@ def _extract_filename_metadata(filenames, src, images_by_key, regular_expression
|
|
351
334
|
|
352
335
|
return images_by_key
|
353
336
|
|
337
|
+
def _extract_filename_metadata(filenames, src, regular_expression, metadata_type='cellvoyager', pick_slice=False, skip_mode='01'):
|
338
|
+
|
339
|
+
images_by_key = defaultdict(list)
|
340
|
+
|
341
|
+
for filename in filenames:
|
342
|
+
match = regular_expression.match(filename)
|
343
|
+
if match:
|
344
|
+
try:
|
345
|
+
try:
|
346
|
+
plate = match.group('plateID')
|
347
|
+
except:
|
348
|
+
plate = os.path.basename(src)
|
349
|
+
|
350
|
+
well = match.group('wellID')
|
351
|
+
field = match.group('fieldID')
|
352
|
+
channel = match.group('chanID')
|
353
|
+
mode = None
|
354
|
+
|
355
|
+
if well[0].isdigit():
|
356
|
+
well = str(_safe_int_convert(well))
|
357
|
+
if field[0].isdigit():
|
358
|
+
field = str(_safe_int_convert(field))
|
359
|
+
if channel[0].isdigit():
|
360
|
+
channel = str(_safe_int_convert(channel))
|
361
|
+
|
362
|
+
if metadata_type =='cq1':
|
363
|
+
orig_wellID = wellID
|
364
|
+
wellID = _convert_cq1_well_id(wellID)
|
365
|
+
print(f'Converted Well ID: {orig_wellID} to {wellID}', end='\r', flush=True)
|
366
|
+
|
367
|
+
if pick_slice:
|
368
|
+
try:
|
369
|
+
mode = match.group('AID')
|
370
|
+
except IndexError:
|
371
|
+
sliceid = '00'
|
372
|
+
|
373
|
+
if mode == skip_mode:
|
374
|
+
continue
|
375
|
+
|
376
|
+
key = (plate, well, field, channel, mode)
|
377
|
+
file_path = os.path.join(src, filename) # Store the full path
|
378
|
+
images_by_key[key].append(file_path)
|
379
|
+
|
380
|
+
except IndexError:
|
381
|
+
print(f"Could not extract information from filename {filename} using provided regex")
|
382
|
+
else:
|
383
|
+
print(f"Filename {filename} did not match provided regex")
|
384
|
+
continue
|
385
|
+
|
386
|
+
return images_by_key
|
387
|
+
|
354
388
|
def mask_object_count(mask):
|
355
389
|
"""
|
356
390
|
Counts the number of objects in a given mask.
|
@@ -441,7 +475,7 @@ def _generate_representative_images(db_path, cells=['HeLa'], cell_loc=None, path
|
|
441
475
|
from .plot import _plot_images_on_grid
|
442
476
|
|
443
477
|
df = _read_and_join_tables(db_path)
|
444
|
-
df =
|
478
|
+
df = annotate_conditions(df, cells, cell_loc, pathogens, pathogen_loc, treatments, treatment_loc)
|
445
479
|
|
446
480
|
if update_db:
|
447
481
|
_update_database_with_merged_info(db_path, df, table='png_list', columns=['pathogen', 'treatment', 'host_cells', 'condition', 'prcfo'])
|
@@ -487,34 +521,6 @@ def _map_values(row, values, locs):
|
|
487
521
|
return value_dict.get(row[type_], None)
|
488
522
|
return values[0] if values else None
|
489
523
|
|
490
|
-
def _annotate_conditions(df, cells=['HeLa'], cell_loc=None, pathogens=['rh'], pathogen_loc=None, treatments=['cm'], treatment_loc=None):
|
491
|
-
"""
|
492
|
-
Annotates conditions in the given DataFrame based on the provided parameters.
|
493
|
-
|
494
|
-
Args:
|
495
|
-
df (pandas.DataFrame): The DataFrame to annotate.
|
496
|
-
cells (list, optional): The list of host cell types. Defaults to ['HeLa'].
|
497
|
-
cell_loc (list, optional): The list of location identifiers for host cells. Defaults to None.
|
498
|
-
pathogens (list, optional): The list of pathogens. Defaults to ['rh'].
|
499
|
-
pathogen_loc (list, optional): The list of location identifiers for pathogens. Defaults to None.
|
500
|
-
treatments (list, optional): The list of treatments. Defaults to ['cm'].
|
501
|
-
treatment_loc (list, optional): The list of location identifiers for treatments. Defaults to None.
|
502
|
-
|
503
|
-
Returns:
|
504
|
-
pandas.DataFrame: The annotated DataFrame with the 'host_cells', 'pathogen', 'treatment', and 'condition' columns.
|
505
|
-
"""
|
506
|
-
|
507
|
-
|
508
|
-
# Apply mappings or defaults
|
509
|
-
df['host_cells'] = [cells[0]] * len(df) if cell_loc is None else df.apply(_map_values, args=(cells, cell_loc), axis=1)
|
510
|
-
df['pathogen'] = [pathogens[0]] * len(df) if pathogen_loc is None else df.apply(_map_values, args=(pathogens, pathogen_loc), axis=1)
|
511
|
-
df['treatment'] = [treatments[0]] * len(df) if treatment_loc is None else df.apply(_map_values, args=(treatments, treatment_loc), axis=1)
|
512
|
-
|
513
|
-
# Construct condition column
|
514
|
-
df['condition'] = df.apply(lambda row: '_'.join(filter(None, [row.get('pathogen'), row.get('treatment')])), axis=1)
|
515
|
-
df['condition'] = df['condition'].apply(lambda x: x if x else 'none')
|
516
|
-
return df
|
517
|
-
|
518
524
|
def is_list_of_lists(var):
|
519
525
|
if isinstance(var, list) and all(isinstance(i, list) for i in var):
|
520
526
|
return True
|
@@ -1083,67 +1089,74 @@ def _get_cellpose_channels(src, nucleus_channel, pathogen_channel, cell_channel)
|
|
1083
1089
|
else:
|
1084
1090
|
cellpose_channels['cell'] = [0,0]
|
1085
1091
|
return cellpose_channels
|
1086
|
-
|
1087
|
-
def annotate_conditions(df, cells=
|
1092
|
+
|
1093
|
+
def annotate_conditions(df, cells=None, cell_loc=None, pathogens=None, pathogen_loc=None, treatments=None, treatment_loc=None):
|
1088
1094
|
"""
|
1089
|
-
Annotates conditions in a DataFrame based on specified criteria.
|
1095
|
+
Annotates conditions in a DataFrame based on specified criteria and combines them into a 'condition' column.
|
1096
|
+
NaN is used for missing values, and they are excluded from the 'condition' column.
|
1090
1097
|
|
1091
1098
|
Args:
|
1092
1099
|
df (pandas.DataFrame): The DataFrame to annotate.
|
1093
|
-
cells (list, optional):
|
1094
|
-
cell_loc (list, optional):
|
1095
|
-
pathogens (list, optional):
|
1096
|
-
pathogen_loc (list, optional):
|
1097
|
-
treatments (list, optional):
|
1098
|
-
treatment_loc (list, optional):
|
1099
|
-
types (list, optional): List of column types for host cells, pathogens, and treatments. Defaults to ['col','col','col'].
|
1100
|
+
cells (list/str, optional): Host cell types. Defaults to None.
|
1101
|
+
cell_loc (list of lists, optional): Values for each host cell type. Defaults to None.
|
1102
|
+
pathogens (list/str, optional): Pathogens. Defaults to None.
|
1103
|
+
pathogen_loc (list of lists, optional): Values for each pathogen. Defaults to None.
|
1104
|
+
treatments (list/str, optional): Treatments. Defaults to None.
|
1105
|
+
treatment_loc (list of lists, optional): Values for each treatment. Defaults to None.
|
1100
1106
|
|
1101
1107
|
Returns:
|
1102
|
-
pandas.DataFrame:
|
1108
|
+
pandas.DataFrame: Annotated DataFrame with a combined 'condition' column.
|
1103
1109
|
"""
|
1110
|
+
|
1111
|
+
def _get_type(val):
|
1112
|
+
"""Determine if a value maps to 'row' or 'col'."""
|
1113
|
+
if isinstance(val, str) and val.startswith('c'):
|
1114
|
+
return 'col'
|
1115
|
+
elif isinstance(val, str) and val.startswith('r'):
|
1116
|
+
return 'row'
|
1117
|
+
return None
|
1104
1118
|
|
1105
|
-
|
1106
|
-
def _map_values(row, dict_, type_='col'):
|
1119
|
+
def _map_or_default(column_name, values, loc, df):
|
1107
1120
|
"""
|
1108
|
-
|
1121
|
+
Consolidates the logic for mapping values or assigning defaults when loc is None.
|
1109
1122
|
|
1110
1123
|
Args:
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1115
|
-
Returns:
|
1116
|
-
str: The mapped value if found, otherwise None.
|
1124
|
+
column_name (str): The column in the DataFrame to annotate.
|
1125
|
+
values (list/str): The list of values or a single string to annotate.
|
1126
|
+
loc (list of lists): Location mapping for the values, or None if not used.
|
1127
|
+
df (pandas.DataFrame): The DataFrame to modify.
|
1117
1128
|
"""
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1129
|
+
if isinstance(values, str) or (isinstance(values, list) and loc is None):
|
1130
|
+
# Assign all rows the first value in the list or the single string
|
1131
|
+
df[column_name] = values if isinstance(values, str) else values[0]
|
1132
|
+
elif values is not None and loc is not None:
|
1133
|
+
# Perform the location-based mapping
|
1134
|
+
value_dict = {val: key for key, loc_list in zip(values, loc) for val in loc_list}
|
1135
|
+
df[column_name] = np.nan
|
1136
|
+
for val, key in value_dict.items():
|
1137
|
+
loc_type = _get_type(val)
|
1138
|
+
if loc_type:
|
1139
|
+
df.loc[df[loc_type] == val, column_name] = key
|
1140
|
+
|
1141
|
+
# Handle cells, pathogens, and treatments using the consolidated logic
|
1142
|
+
_map_or_default('host_cells', cells, cell_loc, df)
|
1143
|
+
_map_or_default('pathogen', pathogens, pathogen_loc, df)
|
1144
|
+
_map_or_default('treatment', treatments, treatment_loc, df)
|
1145
|
+
|
1146
|
+
# Conditionally fill NaN for pathogen and treatment columns if applicable
|
1147
|
+
if pathogens is not None:
|
1148
|
+
df['pathogen'].fillna(np.nan, inplace=True)
|
1149
|
+
if treatments is not None:
|
1150
|
+
df['treatment'].fillna(np.nan, inplace=True)
|
1151
|
+
|
1152
|
+
# Create the 'condition' column by excluding any NaN values, safely checking if 'host_cells', 'pathogen', and 'treatment' exist
|
1153
|
+
df['condition'] = df.apply(
|
1154
|
+
lambda x: '_'.join([str(v) for v in [x.get('host_cells'), x.get('pathogen'), x.get('treatment')] if pd.notna(v)]),
|
1155
|
+
axis=1
|
1156
|
+
)
|
1122
1157
|
|
1123
|
-
if cell_loc is None:
|
1124
|
-
df['host_cells'] = cells[0]
|
1125
|
-
else:
|
1126
|
-
cells_dict = dict(zip(cells, cell_loc))
|
1127
|
-
df['host_cells'] = df.apply(lambda row: _map_values(row, cells_dict, type_=types[0]), axis=1)
|
1128
|
-
if pathogen_loc is None:
|
1129
|
-
if pathogens != None:
|
1130
|
-
df['pathogen'] = 'none'
|
1131
|
-
else:
|
1132
|
-
pathogens_dict = dict(zip(pathogens, pathogen_loc))
|
1133
|
-
df['pathogen'] = df.apply(lambda row: _map_values(row, pathogens_dict, type_=types[1]), axis=1)
|
1134
|
-
if treatment_loc is None:
|
1135
|
-
df['treatment'] = 'cm'
|
1136
|
-
else:
|
1137
|
-
treatments_dict = dict(zip(treatments, treatment_loc))
|
1138
|
-
df['treatment'] = df.apply(lambda row: _map_values(row, treatments_dict, type_=types[2]), axis=1)
|
1139
|
-
if pathogens != None:
|
1140
|
-
df['condition'] = df['pathogen']+'_'+df['treatment']
|
1141
|
-
else:
|
1142
|
-
df['condition'] = df['treatment']
|
1143
1158
|
return df
|
1144
|
-
|
1145
1159
|
|
1146
|
-
|
1147
1160
|
def _split_data(df, group_by, object_type):
|
1148
1161
|
"""
|
1149
1162
|
Splits the input dataframe into numeric and non-numeric parts, groups them by the specified column,
|
@@ -1949,9 +1962,10 @@ def add_images_to_tar(paths_chunk, tar_path, total_images):
|
|
1949
1962
|
tar.add(img_path, arcname=arcname)
|
1950
1963
|
with lock:
|
1951
1964
|
counter.value += 1
|
1952
|
-
if counter.value %
|
1953
|
-
progress = (counter.value / total_images) * 100
|
1954
|
-
print(f"Progress: {counter.value}/{total_images} ({progress:.2f}%)", end='\r', file=sys.stdout, flush=True)
|
1965
|
+
if counter.value % 10 == 0: # Print every 100 updates
|
1966
|
+
#progress = (counter.value / total_images) * 100
|
1967
|
+
#print(f"Progress: {counter.value}/{total_images} ({progress:.2f}%)", end='\r', file=sys.stdout, flush=True)
|
1968
|
+
print_progress(counter.value, total_images, n_jobs=1, time_ls=None, batch_size=None, operation_type="generating .tar dataset")
|
1955
1969
|
except FileNotFoundError:
|
1956
1970
|
print(f"File not found: {img_path}")
|
1957
1971
|
|
@@ -2068,52 +2082,6 @@ def check_multicollinearity(x):
|
|
2068
2082
|
vif_data["VIF"] = [variance_inflation_factor(x.values, i) for i in range(x.shape[1])]
|
2069
2083
|
return vif_data
|
2070
2084
|
|
2071
|
-
def generate_dependent_variable(df, dv_loc, pc_min=0.95, nc_max=0.05, agg_type='mean'):
|
2072
|
-
|
2073
|
-
from .plot import _plot_histograms_and_stats, _plot_plates
|
2074
|
-
|
2075
|
-
def qstring_to_float(qstr):
|
2076
|
-
number = int(qstr[1:]) # Remove the "q" and convert the rest to an integer
|
2077
|
-
return number / 100.0
|
2078
|
-
|
2079
|
-
print("Unique values in plate:", df['plate'].unique())
|
2080
|
-
dv_cell_loc = f'{dv_loc}/dv_cell.csv'
|
2081
|
-
dv_well_loc = f'{dv_loc}/dv_well.csv'
|
2082
|
-
|
2083
|
-
df['pred'] = 1-df['pred'] #if you swiched pc and nc
|
2084
|
-
df = df[(df['pred'] <= nc_max) | (df['pred'] >= pc_min)]
|
2085
|
-
|
2086
|
-
if 'prc' not in df.columns:
|
2087
|
-
df['prc'] = df['plate'] + '_' + df['row'] + '_' + df['col']
|
2088
|
-
|
2089
|
-
if agg_type.startswith('q'):
|
2090
|
-
val = qstring_to_float(agg_type)
|
2091
|
-
agg_type = lambda x: x.quantile(val)
|
2092
|
-
|
2093
|
-
# Aggregating for mean prediction and total count
|
2094
|
-
df_grouped = df.groupby('prc').agg(
|
2095
|
-
pred=('pred', agg_type),
|
2096
|
-
recruitment=('recruitment', agg_type),
|
2097
|
-
count_prc=('prc', 'size'),
|
2098
|
-
#count_above_95=('pred', lambda x: (x > 0.95).sum()),
|
2099
|
-
mean_pathogen_area=('pathogen_area', 'mean')
|
2100
|
-
)
|
2101
|
-
|
2102
|
-
df_cell = df[['prc', 'pred', 'pathogen_area', 'recruitment']]
|
2103
|
-
|
2104
|
-
df_cell.to_csv(dv_cell_loc, index=True, header=True, mode='w')
|
2105
|
-
df_grouped.to_csv(dv_well_loc, index=True, header=True, mode='w') # Changed from loc to dv_loc
|
2106
|
-
display(df)
|
2107
|
-
_plot_histograms_and_stats(df)
|
2108
|
-
df_grouped = df_grouped.sort_values(by='count_prc', ascending=True)
|
2109
|
-
display(df_grouped)
|
2110
|
-
print('pred')
|
2111
|
-
_plot_plates(df=df_cell, variable='pred', grouping='mean', min_max='allq', cmap='viridis')
|
2112
|
-
print('recruitment')
|
2113
|
-
_plot_plates(df=df_cell, variable='recruitment', grouping='mean', min_max='allq', cmap='viridis')
|
2114
|
-
|
2115
|
-
return df_grouped
|
2116
|
-
|
2117
2085
|
def lasso_reg(merged_df, alpha_value=0.01, reg_type='lasso'):
|
2118
2086
|
# Separate predictors and response
|
2119
2087
|
X = merged_df[['gene', 'grna', 'plate', 'row', 'column']]
|
@@ -3592,13 +3560,48 @@ def plot_grid(cluster_images, colors, figuresize, black_background, verbose):
|
|
3592
3560
|
plt.show()
|
3593
3561
|
return grid_fig
|
3594
3562
|
|
3595
|
-
def
|
3563
|
+
def generate_path_list_from_db(db_path, file_metadata):
|
3564
|
+
|
3565
|
+
all_paths = []
|
3566
|
+
|
3567
|
+
# Connect to the database and retrieve the image paths
|
3568
|
+
print(f"Reading DataBase: {db_path}")
|
3569
|
+
try:
|
3570
|
+
with sqlite3.connect(db_path) as conn:
|
3571
|
+
cursor = conn.cursor()
|
3572
|
+
if file_metadata:
|
3573
|
+
if isinstance(file_metadata, str):
|
3574
|
+
cursor.execute("SELECT png_path FROM png_list WHERE png_path LIKE ?", (f"%{file_metadata}%",))
|
3575
|
+
else:
|
3576
|
+
cursor.execute("SELECT png_path FROM png_list")
|
3596
3577
|
|
3597
|
-
|
3598
|
-
|
3599
|
-
|
3578
|
+
while True:
|
3579
|
+
rows = cursor.fetchmany(1000)
|
3580
|
+
if not rows:
|
3581
|
+
break
|
3582
|
+
all_paths.extend([row[0] for row in rows])
|
3583
|
+
|
3584
|
+
except sqlite3.Error as e:
|
3585
|
+
print(f"Database error: {e}")
|
3586
|
+
return
|
3587
|
+
except Exception as e:
|
3588
|
+
print(f"Error: {e}")
|
3589
|
+
return
|
3600
3590
|
|
3601
|
-
|
3591
|
+
return all_paths
|
3592
|
+
|
3593
|
+
def correct_paths(df, base_path):
|
3594
|
+
|
3595
|
+
if isinstance(df, pd.DataFrame):
|
3596
|
+
|
3597
|
+
if 'png_path' not in df.columns:
|
3598
|
+
print("No 'png_path' column found in the dataframe.")
|
3599
|
+
return df, None
|
3600
|
+
else:
|
3601
|
+
image_paths = df['png_path'].to_list()
|
3602
|
+
|
3603
|
+
elif isinstance(df, list):
|
3604
|
+
image_paths = df
|
3602
3605
|
|
3603
3606
|
adjusted_image_paths = []
|
3604
3607
|
for path in image_paths:
|
@@ -3612,9 +3615,11 @@ def correct_paths(df, base_path):
|
|
3612
3615
|
else:
|
3613
3616
|
adjusted_image_paths.append(path)
|
3614
3617
|
|
3615
|
-
df
|
3616
|
-
|
3617
|
-
|
3618
|
+
if isinstance(df, pd.DataFrame):
|
3619
|
+
df['png_path'] = adjusted_image_paths
|
3620
|
+
return df, adjusted_image_paths
|
3621
|
+
else:
|
3622
|
+
return adjusted_image_paths
|
3618
3623
|
|
3619
3624
|
def delete_folder(folder_path):
|
3620
3625
|
if os.path.exists(folder_path) and os.path.isdir(folder_path):
|
@@ -4422,7 +4427,7 @@ def convert_and_relabel_masks(folder_path):
|
|
4422
4427
|
|
4423
4428
|
def correct_masks(src):
|
4424
4429
|
|
4425
|
-
from .
|
4430
|
+
from .io import _load_and_concatenate_arrays
|
4426
4431
|
|
4427
4432
|
cell_path = os.path.join(src,'norm_channel_stack', 'cell_mask_stack')
|
4428
4433
|
convert_and_relabel_masks(cell_path)
|
@@ -4445,4 +4450,101 @@ def get_cuda_version():
|
|
4445
4450
|
except (subprocess.CalledProcessError, FileNotFoundError):
|
4446
4451
|
return None
|
4447
4452
|
|
4453
|
+
def all_elements_match(list1, list2):
|
4454
|
+
# Check if all elements in list1 are in list2
|
4455
|
+
return all(element in list2 for element in list1)
|
4456
|
+
|
4457
|
+
def prepare_batch_for_segmentation(batch):
|
4458
|
+
# Ensure the batch is of dtype float32
|
4459
|
+
if batch.dtype != np.float32:
|
4460
|
+
batch = batch.astype(np.float32)
|
4461
|
+
|
4462
|
+
# Normalize each image in the batch
|
4463
|
+
for i in range(batch.shape[0]):
|
4464
|
+
if batch[i].max() > 1:
|
4465
|
+
batch[i] = batch[i] / batch[i].max()
|
4466
|
+
|
4467
|
+
return batch
|
4468
|
+
|
4469
|
+
def check_index(df, elements=5, split_char='_'):
|
4470
|
+
problematic_indices = []
|
4471
|
+
for idx in df.index:
|
4472
|
+
parts = str(idx).split(split_char)
|
4473
|
+
if len(parts) != elements:
|
4474
|
+
problematic_indices.append(idx)
|
4475
|
+
if problematic_indices:
|
4476
|
+
print("Indices that cannot be separated into 5 parts:")
|
4477
|
+
for idx in problematic_indices:
|
4478
|
+
print(idx)
|
4479
|
+
raise ValueError(f"Found {len(problematic_indices)} problematic indices that do not split into {elements} parts.")
|
4480
|
+
|
4481
|
+
# Define the mapping function
|
4482
|
+
def map_condition(col_value, neg='c1', pos='c2', mix='c3'):
|
4483
|
+
if col_value == neg:
|
4484
|
+
return 'neg'
|
4485
|
+
elif col_value == pos:
|
4486
|
+
return 'pos'
|
4487
|
+
elif col_value == mix:
|
4488
|
+
return 'mix'
|
4489
|
+
else:
|
4490
|
+
return 'screen'
|
4491
|
+
|
4492
|
+
def download_models(repo_id="einarolafsson/models", local_dir=None, retries=5, delay=5):
|
4493
|
+
"""
|
4494
|
+
Downloads all model files from Hugging Face and stores them in the specified local directory.
|
4495
|
+
|
4496
|
+
Args:
|
4497
|
+
repo_id (str): The repository ID on Hugging Face (default is 'einarolafsson/models').
|
4498
|
+
local_dir (str): The local directory where models will be saved. Defaults to '/home/carruthers/Desktop/test'.
|
4499
|
+
retries (int): Number of retry attempts in case of failure.
|
4500
|
+
delay (int): Delay in seconds between retries.
|
4448
4501
|
|
4502
|
+
Returns:
|
4503
|
+
str: The local path to the downloaded models.
|
4504
|
+
"""
|
4505
|
+
# Create the local directory if it doesn't exist
|
4506
|
+
if not os.path.exists(local_dir):
|
4507
|
+
os.makedirs(local_dir)
|
4508
|
+
elif len(os.listdir(local_dir)) > 0:
|
4509
|
+
print(f"Models already downloaded to: {local_dir}")
|
4510
|
+
return local_dir
|
4511
|
+
|
4512
|
+
attempt = 0
|
4513
|
+
while attempt < retries:
|
4514
|
+
try:
|
4515
|
+
# List all files in the repo
|
4516
|
+
files = list_repo_files(repo_id, repo_type="dataset")
|
4517
|
+
print(f"Files in repository: {files}") # Debugging print to check file list
|
4518
|
+
|
4519
|
+
# Download each file
|
4520
|
+
for file_name in files:
|
4521
|
+
for download_attempt in range(retries):
|
4522
|
+
try:
|
4523
|
+
url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{file_name}?download=true"
|
4524
|
+
print(f"Downloading file from: {url}") # Debugging
|
4525
|
+
|
4526
|
+
response = requests.get(url, stream=True)
|
4527
|
+
print(f"HTTP response status: {response.status_code}") # Debugging
|
4528
|
+
response.raise_for_status()
|
4529
|
+
|
4530
|
+
# Save the file locally
|
4531
|
+
local_file_path = os.path.join(local_dir, os.path.basename(file_name))
|
4532
|
+
with open(local_file_path, 'wb') as file:
|
4533
|
+
for chunk in response.iter_content(chunk_size=8192):
|
4534
|
+
file.write(chunk)
|
4535
|
+
print(f"Downloaded model file: {file_name} to {local_file_path}")
|
4536
|
+
break # Exit the retry loop if successful
|
4537
|
+
except (requests.HTTPError, requests.Timeout) as e:
|
4538
|
+
print(f"Error downloading {file_name}: {e}. Retrying in {delay} seconds...")
|
4539
|
+
time.sleep(delay)
|
4540
|
+
else:
|
4541
|
+
raise Exception(f"Failed to download {file_name} after multiple attempts.")
|
4542
|
+
|
4543
|
+
return local_dir # Return the directory where models are saved
|
4544
|
+
|
4545
|
+
except (requests.HTTPError, requests.Timeout) as e:
|
4546
|
+
print(f"Error downloading files: {e}. Retrying in {delay} seconds...")
|
4547
|
+
attempt += 1
|
4548
|
+
time.sleep(delay)
|
4549
|
+
|
4550
|
+
raise Exception("Failed to download model files after multiple attempts.")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: spacr
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.2
|
4
4
|
Summary: Spatial phenotype analysis of crisp screens (SpaCr)
|
5
5
|
Home-page: https://github.com/EinarOlafsson/spacr
|
6
6
|
Author: Einar Birnir Olafsson
|
@@ -8,6 +8,7 @@ Author-email: olafsson@med.umich.com
|
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
9
9
|
Classifier: License :: OSI Approved :: MIT License
|
10
10
|
Classifier: Operating System :: OS Independent
|
11
|
+
Description-Content-Type: text/x-rst
|
11
12
|
License-File: LICENSE
|
12
13
|
Requires-Dist: torch<3.0,>=2.0
|
13
14
|
Requires-Dist: torchvision<1.0,>=0.1
|
@@ -58,6 +59,10 @@ Requires-Dist: segmentation-models-pytorch>=0.3.3
|
|
58
59
|
Requires-Dist: tifffile>=2023.4.12
|
59
60
|
Requires-Dist: tqdm>=4.65.0
|
60
61
|
Requires-Dist: wandb>=0.16.2
|
62
|
+
Requires-Dist: openai<2.0,>=1.50.2
|
63
|
+
Requires-Dist: nd2reader<4.0,>=3.3.0
|
64
|
+
Requires-Dist: czifile
|
65
|
+
Requires-Dist: adjustText<2.0,>=1.2.0
|
61
66
|
Requires-Dist: huggingface-hub<0.25,>=0.24.0
|
62
67
|
Provides-Extra: dev
|
63
68
|
Requires-Dist: pytest<3.11,>=3.9; extra == "dev"
|
@@ -115,6 +120,7 @@ If using Windows, switch to Linux—it's free, open-source, and better.
|
|
115
120
|
Before installing SpaCr on OSX ensure OpenMP is installed::
|
116
121
|
|
117
122
|
brew install libomp
|
123
|
+
brew install hdf5
|
118
124
|
|
119
125
|
SpaCr GUI requires Tkinter. On Linux, ensure Tkinter is installed. (Tkinter is included with the standard Python installation on macOS and Windows)::
|
120
126
|
|