spacr 0.0.70__py3-none-any.whl → 0.0.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/annotate_app.py +2 -4
- spacr/core.py +32 -32
- spacr/foldseek.py +6 -6
- spacr/get_alfafold_structures.py +3 -3
- spacr/io.py +53 -50
- spacr/sim.py +24 -29
- spacr/utils.py +18 -78
- {spacr-0.0.70.dist-info → spacr-0.0.71.dist-info}/METADATA +10 -8
- {spacr-0.0.70.dist-info → spacr-0.0.71.dist-info}/RECORD +13 -16
- spacr/graph_learning_lap.py +0 -84
- spacr/train.py +0 -667
- spacr/umap.py +0 -0
- {spacr-0.0.70.dist-info → spacr-0.0.71.dist-info}/LICENSE +0 -0
- {spacr-0.0.70.dist-info → spacr-0.0.71.dist-info}/WHEEL +0 -0
- {spacr-0.0.70.dist-info → spacr-0.0.71.dist-info}/entry_points.txt +0 -0
- {spacr-0.0.70.dist-info → spacr-0.0.71.dist-info}/top_level.txt +0 -0
spacr/annotate_app.py
CHANGED
@@ -126,12 +126,10 @@ class ImageApp:
|
|
126
126
|
Normalize the pixel values of an image to the range [0, 255].
|
127
127
|
|
128
128
|
Parameters:
|
129
|
-
- img: PIL.Image.Image
|
130
|
-
The input image to be normalized.
|
129
|
+
- img: PIL.Image.Image. The input image to be normalized.
|
131
130
|
|
132
131
|
Returns:
|
133
|
-
- PIL.Image.Image
|
134
|
-
The normalized image.
|
132
|
+
- PIL.Image.Image. The normalized image.
|
135
133
|
"""
|
136
134
|
img_array = np.array(img)
|
137
135
|
img_array = ((img_array - img_array.min()) * (1/(img_array.max() - img_array.min()) * 255)).astype('uint8')
|
spacr/core.py
CHANGED
@@ -3118,29 +3118,29 @@ def generate_image_umap(settings={}):
|
|
3118
3118
|
|
3119
3119
|
Parameters:
|
3120
3120
|
settings (dict): Dictionary containing the following keys:
|
3121
|
-
|
3122
|
-
|
3123
|
-
|
3124
|
-
|
3125
|
-
|
3126
|
-
|
3127
|
-
|
3128
|
-
|
3129
|
-
|
3130
|
-
|
3131
|
-
|
3132
|
-
|
3133
|
-
|
3134
|
-
|
3135
|
-
|
3136
|
-
|
3137
|
-
|
3138
|
-
|
3139
|
-
|
3140
|
-
|
3141
|
-
|
3142
|
-
|
3143
|
-
|
3121
|
+
src (str): Source directory containing the data.
|
3122
|
+
row_limit (int): Limit the number of rows to process.
|
3123
|
+
tables (list): List of table names to read from the database.
|
3124
|
+
visualize (str): Visualization type.
|
3125
|
+
image_nr (int): Number of images to display.
|
3126
|
+
dot_size (int): Size of dots in the scatter plot.
|
3127
|
+
n_neighbors (int): Number of neighbors for UMAP.
|
3128
|
+
figuresize (int): Size of the figure.
|
3129
|
+
black_background (bool): Whether to use a black background.
|
3130
|
+
remove_image_canvas (bool): Whether to remove the image canvas.
|
3131
|
+
plot_outlines (bool): Whether to plot outlines.
|
3132
|
+
plot_points (bool): Whether to plot points.
|
3133
|
+
smooth_lines (bool): Whether to smooth lines.
|
3134
|
+
verbose (bool): Whether to print verbose output.
|
3135
|
+
embedding_by_controls (bool): Whether to use embedding from controls.
|
3136
|
+
col_to_compare (str): Column to compare for control-based embedding.
|
3137
|
+
pos (str): Positive control value.
|
3138
|
+
neg (str): Negative control value.
|
3139
|
+
clustering (str): Clustering method ('DBSCAN' or 'KMeans').
|
3140
|
+
exclude (list): List of columns to exclude from the analysis.
|
3141
|
+
plot_images (bool): Whether to plot images.
|
3142
|
+
reduction_method (str): Dimensionality reduction method ('UMAP' or 'tSNE').
|
3143
|
+
save_figure (bool): Whether to save the figure as a PDF.
|
3144
3144
|
|
3145
3145
|
Returns:
|
3146
3146
|
pd.DataFrame: DataFrame with the original data and an additional column 'cluster' containing the cluster identity.
|
@@ -3311,15 +3311,15 @@ def reducer_hyperparameter_search(settings={}, reduction_params=None, dbscan_par
|
|
3311
3311
|
|
3312
3312
|
Parameters:
|
3313
3313
|
settings (dict): Dictionary containing the following keys:
|
3314
|
-
|
3315
|
-
|
3316
|
-
|
3317
|
-
|
3318
|
-
|
3319
|
-
|
3320
|
-
|
3321
|
-
|
3322
|
-
|
3314
|
+
src (str): Source directory containing the data.
|
3315
|
+
row_limit (int): Limit the number of rows to process.
|
3316
|
+
tables (list): List of table names to read from the database.
|
3317
|
+
filter_by (str): Column to filter the data.
|
3318
|
+
sample_size (int): Number of samples to use for the hyperparameter search.
|
3319
|
+
remove_highly_correlated (bool): Whether to remove highly correlated columns.
|
3320
|
+
log_data (bool): Whether to log transform the data.
|
3321
|
+
verbose (bool): Whether to print verbose output.
|
3322
|
+
reduction_method (str): Dimensionality reduction method ('UMAP' or 'tSNE').
|
3323
3323
|
reduction_params (list): List of dictionaries containing hyperparameters to test for the reduction method.
|
3324
3324
|
dbscan_params (list): List of dictionaries containing DBSCAN hyperparameters to test.
|
3325
3325
|
kmeans_params (list): List of dictionaries containing KMeans hyperparameters to test.
|
spacr/foldseek.py
CHANGED
@@ -762,18 +762,18 @@ def analyze_results(foldseek_csv_path, base_dir):
|
|
762
762
|
#display(functional_data_df)
|
763
763
|
|
764
764
|
# Set up directories
|
765
|
-
structure_fldr_path = "/home/carruthers/Downloads/ME49_proteome/cif"
|
766
|
-
base_dir='/home/carruthers/foldseek/me49'
|
765
|
+
#structure_fldr_path = "/home/carruthers/Downloads/ME49_proteome/cif"
|
766
|
+
#base_dir='/home/carruthers/foldseek/me49'
|
767
767
|
|
768
|
-
align_to_database(structure_fldr_path, base_dir, cores=25)
|
768
|
+
#align_to_database(structure_fldr_path, base_dir, cores=25)
|
769
769
|
#foldseek_csv_path = f'{base_dir}/results/pdb/aln_tmscore.csv'
|
770
770
|
#analyze_results(foldseek_csv_path, base_dir)
|
771
771
|
|
772
772
|
# Set up directories
|
773
|
-
structure_fldr_path = "/home/carruthers/Downloads/GT1_proteome/cif"
|
774
|
-
base_dir='/home/carruthers/foldseek/gt1'
|
773
|
+
#structure_fldr_path = "/home/carruthers/Downloads/GT1_proteome/cif"
|
774
|
+
#base_dir='/home/carruthers/foldseek/gt1'
|
775
775
|
|
776
|
-
align_to_database(structure_fldr_path, base_dir, cores=25)
|
776
|
+
#align_to_database(structure_fldr_path, base_dir, cores=25)
|
777
777
|
#foldseek_csv_path = f'{base_dir}/results/pdb/aln_tmscore.csv'
|
778
778
|
#analyze_results(foldseek_csv_path, base_dir)
|
779
779
|
|
spacr/get_alfafold_structures.py
CHANGED
@@ -67,6 +67,6 @@ def download_alphafold_structures(tsv_location, dst, version="4"):
|
|
67
67
|
print(f"Failed download entries saved to: {os.path.join(dst, 'failed_downloads.csv')}")
|
68
68
|
|
69
69
|
# Example usage:
|
70
|
-
tsv_location = '/home/carruthers/Downloads/GT1_proteome/GT1_proteins_uniprot.tsv' # Replace with the path to your TSV file containing a list of UniProt entries
|
71
|
-
dst_folder = '/home/carruthers/Downloads/GT1_proteome' # Replace with your destination folder
|
72
|
-
download_alphafold_structures(tsv_location, dst_folder)
|
70
|
+
#tsv_location = '/home/carruthers/Downloads/GT1_proteome/GT1_proteins_uniprot.tsv' # Replace with the path to your TSV file containing a list of UniProt entries
|
71
|
+
#dst_folder = '/home/carruthers/Downloads/GT1_proteome' # Replace with your destination folder
|
72
|
+
#download_alphafold_structures(tsv_location, dst_folder)
|
spacr/io.py
CHANGED
@@ -255,31 +255,24 @@ class CombinedDataset(Dataset):
|
|
255
255
|
|
256
256
|
class NoClassDataset(Dataset):
|
257
257
|
"""
|
258
|
-
A custom dataset class for handling
|
259
|
-
|
258
|
+
A custom dataset class for handling image data without class labels.
|
259
|
+
|
260
260
|
Args:
|
261
|
-
data_dir (str): The directory path where the
|
262
|
-
transform (callable, optional): A function/transform
|
261
|
+
data_dir (str): The directory path where the image files are located.
|
262
|
+
transform (callable, optional): A function/transform to apply to the image data. Default is None.
|
263
263
|
shuffle (bool, optional): Whether to shuffle the dataset. Default is True.
|
264
264
|
load_to_memory (bool, optional): Whether to load all images into memory. Default is False.
|
265
|
-
|
265
|
+
|
266
266
|
Attributes:
|
267
|
-
data_dir (str): The directory path where the
|
268
|
-
transform (callable): A function/transform
|
267
|
+
data_dir (str): The directory path where the image files are located.
|
268
|
+
transform (callable): A function/transform to apply to the image data.
|
269
269
|
shuffle (bool): Whether to shuffle the dataset.
|
270
270
|
load_to_memory (bool): Whether to load all images into memory.
|
271
|
-
filenames (list):
|
272
|
-
images (list):
|
273
|
-
|
274
|
-
Methods:
|
275
|
-
load_image: Loads an image from the given file path.
|
276
|
-
__len__: Returns the number of images in the dataset.
|
277
|
-
shuffle_dataset: Shuffles the dataset.
|
278
|
-
__getitem__: Retrieves an image and its corresponding file path from the dataset.
|
279
|
-
|
271
|
+
filenames (list): A list of file paths for the image files.
|
272
|
+
images (list): A list of loaded images (if load_to_memory is True).
|
280
273
|
"""
|
281
|
-
|
282
|
-
def
|
274
|
+
|
275
|
+
def __init__(self, data_dir, transform=None, shuffle=True, load_to_memory=False):
|
283
276
|
self.data_dir = data_dir
|
284
277
|
self.transform = transform
|
285
278
|
self.shuffle = shuffle
|
@@ -289,16 +282,47 @@ class NoClassDataset(Dataset):
|
|
289
282
|
self.shuffle_dataset()
|
290
283
|
if self.load_to_memory:
|
291
284
|
self.images = [self.load_image(f) for f in self.filenames]
|
285
|
+
|
292
286
|
#@lru_cache(maxsize=None)
|
293
287
|
def load_image(self, img_path):
|
288
|
+
"""
|
289
|
+
Load an image from the given file path.
|
290
|
+
|
291
|
+
Args:
|
292
|
+
img_path (str): The file path of the image.
|
293
|
+
|
294
|
+
Returns:
|
295
|
+
PIL.Image: The loaded image.
|
296
|
+
"""
|
294
297
|
img = Image.open(img_path).convert('RGB')
|
295
298
|
return img
|
296
|
-
|
299
|
+
|
300
|
+
def __len__(self):
|
301
|
+
"""
|
302
|
+
Get the total number of images in the dataset.
|
303
|
+
|
304
|
+
Returns:
|
305
|
+
int: The number of images in the dataset.
|
306
|
+
"""
|
297
307
|
return len(self.filenames)
|
308
|
+
|
298
309
|
def shuffle_dataset(self):
|
310
|
+
"""
|
311
|
+
Shuffle the dataset.
|
312
|
+
"""
|
299
313
|
if self.shuffle:
|
300
314
|
random.shuffle(self.filenames)
|
301
|
-
|
315
|
+
|
316
|
+
def __getitem__(self, index):
|
317
|
+
"""
|
318
|
+
Get the image and its corresponding filename at the given index.
|
319
|
+
|
320
|
+
Args:
|
321
|
+
index (int): The index of the image in the dataset.
|
322
|
+
|
323
|
+
Returns:
|
324
|
+
tuple: A tuple containing the image and its filename.
|
325
|
+
"""
|
302
326
|
if self.load_to_memory:
|
303
327
|
img = self.images[index]
|
304
328
|
else:
|
@@ -374,32 +398,7 @@ class MyDataset(Dataset):
|
|
374
398
|
return img, label, filename
|
375
399
|
|
376
400
|
class NoClassDataset(Dataset):
|
377
|
-
|
378
|
-
A custom dataset class for handling images without class labels.
|
379
|
-
|
380
|
-
Args:
|
381
|
-
data_dir (str): The directory path where the images are stored.
|
382
|
-
transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. Default is None.
|
383
|
-
shuffle (bool, optional): Whether to shuffle the dataset. Default is True.
|
384
|
-
load_to_memory (bool, optional): Whether to load all images into memory. Default is False.
|
385
|
-
|
386
|
-
Attributes:
|
387
|
-
data_dir (str): The directory path where the images are stored.
|
388
|
-
transform (callable): A function/transform that takes in an PIL image and returns a transformed version.
|
389
|
-
shuffle (bool): Whether to shuffle the dataset.
|
390
|
-
load_to_memory (bool): Whether to load all images into memory.
|
391
|
-
filenames (list): List of file paths of the images.
|
392
|
-
images (list): List of loaded images (if load_to_memory is True).
|
393
|
-
|
394
|
-
Methods:
|
395
|
-
load_image: Load an image from the given file path.
|
396
|
-
__len__: Get the length of the dataset.
|
397
|
-
shuffle_dataset: Shuffle the dataset.
|
398
|
-
__getitem__: Get an item (image and its filename) from the dataset.
|
399
|
-
|
400
|
-
"""
|
401
|
-
|
402
|
-
def _init__(self, data_dir, transform=None, shuffle=True, load_to_memory=False):
|
401
|
+
def __init__(self, data_dir, transform=None, shuffle=True, load_to_memory=False):
|
403
402
|
self.data_dir = data_dir
|
404
403
|
self.transform = transform
|
405
404
|
self.shuffle = shuffle
|
@@ -409,16 +408,20 @@ class NoClassDataset(Dataset):
|
|
409
408
|
self.shuffle_dataset()
|
410
409
|
if self.load_to_memory:
|
411
410
|
self.images = [self.load_image(f) for f in self.filenames]
|
412
|
-
|
411
|
+
|
413
412
|
def load_image(self, img_path):
|
414
413
|
img = Image.open(img_path).convert('RGB')
|
415
414
|
return img
|
416
|
-
|
415
|
+
|
416
|
+
def __len__(self):
|
417
|
+
|
417
418
|
return len(self.filenames)
|
419
|
+
|
418
420
|
def shuffle_dataset(self):
|
419
421
|
if self.shuffle:
|
420
422
|
random.shuffle(self.filenames)
|
421
|
-
|
423
|
+
|
424
|
+
def __getitem__(self, index):
|
422
425
|
if self.load_to_memory:
|
423
426
|
img = self.images[index]
|
424
427
|
else:
|
@@ -427,8 +430,8 @@ class NoClassDataset(Dataset):
|
|
427
430
|
img = self.transform(img)
|
428
431
|
else:
|
429
432
|
img = ToTensor()(img)
|
430
|
-
# Return both the image and its filename
|
431
433
|
return img, self.filenames[index]
|
434
|
+
|
432
435
|
|
433
436
|
class TarImageDataset(Dataset):
|
434
437
|
def _init__(self, tar_path, transform=None):
|
spacr/sim.py
CHANGED
@@ -204,16 +204,12 @@ def power_law_dist_gen(df, avg, well_ineq_coeff):
|
|
204
204
|
Generate a power-law distribution for wells.
|
205
205
|
|
206
206
|
Parameters:
|
207
|
-
- df: DataFrame
|
208
|
-
|
209
|
-
-
|
210
|
-
The average value for the distribution.
|
211
|
-
- well_ineq_coeff: float
|
212
|
-
The inequality coefficient for the power-law distribution.
|
207
|
+
- df: DataFrame: The input DataFrame containing the wells.
|
208
|
+
- avg: float: The average value for the distribution.
|
209
|
+
- well_ineq_coeff: float: The inequality coefficient for the power-law distribution.
|
213
210
|
|
214
211
|
Returns:
|
215
|
-
- dist: ndarray
|
216
|
-
The generated power-law distribution for the wells.
|
212
|
+
- dist: ndarray: The generated power-law distribution for the wells.
|
217
213
|
"""
|
218
214
|
# Generate a power-law distribution for wells
|
219
215
|
distribution = generate_power_law_distribution(len(df), well_ineq_coeff)
|
@@ -405,8 +401,7 @@ def compute_roc_auc(cell_scores):
|
|
405
401
|
- cell_scores (DataFrame): DataFrame containing cell scores with columns 'is_active' and 'score'.
|
406
402
|
|
407
403
|
Returns:
|
408
|
-
- cell_roc_dict (dict): Dictionary containing the ROC curve information, including the threshold, true positive rate (TPR),
|
409
|
-
false positive rate (FPR), and ROC AUC.
|
404
|
+
- cell_roc_dict (dict): Dictionary containing the ROC curve information, including the threshold, true positive rate (TPR), false positive rate (FPR), and ROC AUC.
|
410
405
|
|
411
406
|
"""
|
412
407
|
fpr, tpr, thresh = roc_curve(cell_scores['is_active'], cell_scores['score'], pos_label=1)
|
@@ -567,11 +562,11 @@ def regression_roc_auc(results_df, active_gene_list, control_gene_list, alpha =
|
|
567
562
|
|
568
563
|
Returns:
|
569
564
|
tuple: A tuple containing the following:
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
565
|
+
- results_df (DataFrame): Updated DataFrame with additional columns.
|
566
|
+
- reg_roc_dict_df (DataFrame): DataFrame containing regression ROC curve data.
|
567
|
+
- reg_pr_dict_df (DataFrame): DataFrame containing precision-recall curve data.
|
568
|
+
- reg_cm (ndarray): Confusion matrix.
|
569
|
+
- sim_stats (DataFrame): DataFrame containing simulation statistics.
|
575
570
|
"""
|
576
571
|
results_df = results_df.rename(columns={"P>|t|": "p"})
|
577
572
|
|
@@ -737,20 +732,20 @@ def run_simulation(settings):
|
|
737
732
|
|
738
733
|
Returns:
|
739
734
|
tuple: A tuple containing the simulation results and distances.
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
735
|
+
- cell_scores (DataFrame): Scores for each cell.
|
736
|
+
- cell_roc_dict_df (DataFrame): ROC AUC scores for each cell.
|
737
|
+
- cell_pr_dict_df (DataFrame): Precision-Recall AUC scores for each cell.
|
738
|
+
- cell_cm (DataFrame): Confusion matrix for each cell.
|
739
|
+
- well_score (DataFrame): Scores for each well.
|
740
|
+
- gene_fraction_map (DataFrame): Fraction of genes for each well.
|
741
|
+
- metadata (DataFrame): Metadata for each well.
|
742
|
+
- results_df (DataFrame): Results of the regression analysis.
|
743
|
+
- reg_roc_dict_df (DataFrame): ROC AUC scores for each gene.
|
744
|
+
- reg_pr_dict_df (DataFrame): Precision-Recall AUC scores for each gene.
|
745
|
+
- reg_cm (DataFrame): Confusion matrix for each gene.
|
746
|
+
- sim_stats (dict): Additional simulation statistics.
|
747
|
+
- genes_per_well_df (DataFrame): Number of genes per well.
|
748
|
+
- wells_per_gene_df (DataFrame): Number of wells per gene.
|
754
749
|
dists (list): List of distances.
|
755
750
|
"""
|
756
751
|
#try:
|
spacr/utils.py
CHANGED
@@ -370,12 +370,10 @@ def mask_object_count(mask):
|
|
370
370
|
Counts the number of objects in a given mask.
|
371
371
|
|
372
372
|
Parameters:
|
373
|
-
- mask: numpy.ndarray
|
374
|
-
The mask containing object labels.
|
373
|
+
- mask: numpy.ndarray. The mask containing object labels.
|
375
374
|
|
376
375
|
Returns:
|
377
|
-
- int
|
378
|
-
The number of objects in the mask.
|
376
|
+
- int. The number of objects in the mask.
|
379
377
|
"""
|
380
378
|
unique_labels = np.unique(mask)
|
381
379
|
num_objects = len(unique_labels[unique_labels!=0])
|
@@ -578,15 +576,15 @@ def normalize_to_dtype(array, p1=2, p2=98):
|
|
578
576
|
|
579
577
|
Parameters:
|
580
578
|
- array: numpy array
|
581
|
-
|
579
|
+
The input stack to be normalized.
|
582
580
|
- p1: int, optional
|
583
|
-
|
581
|
+
The lower percentile value for normalization. Default is 2.
|
584
582
|
- p2: int, optional
|
585
|
-
|
583
|
+
The upper percentile value for normalization. Default is 98.
|
586
584
|
|
587
585
|
Returns:
|
588
586
|
- new_stack: numpy array
|
589
|
-
|
587
|
+
The normalized stack with the same shape as the input stack.
|
590
588
|
"""
|
591
589
|
nimg = array.shape[2]
|
592
590
|
new_stack = np.empty_like(array, dtype=np.float32)
|
@@ -1485,52 +1483,18 @@ class SpatialAttention(nn.Module):
|
|
1485
1483
|
|
1486
1484
|
# Multi-Scale Block with Attention
|
1487
1485
|
class MultiScaleBlockWithAttention(nn.Module):
|
1488
|
-
"""
|
1489
|
-
Multi-scale block with attention module.
|
1490
|
-
|
1491
|
-
Args:
|
1492
|
-
in_channels (int): Number of input channels.
|
1493
|
-
out_channels (int): Number of output channels.
|
1494
|
-
|
1495
|
-
Attributes:
|
1496
|
-
dilated_conv1 (nn.Conv2d): Dilated convolution layer.
|
1497
|
-
spatial_attention (nn.Conv2d): Spatial attention layer.
|
1498
|
-
|
1499
|
-
Methods:
|
1500
|
-
custom_forward: Custom forward method for the module.
|
1501
|
-
forward: Forward method for the module.
|
1502
|
-
"""
|
1503
|
-
|
1504
1486
|
def __init__(self, in_channels, out_channels):
|
1505
1487
|
super(MultiScaleBlockWithAttention, self).__init__()
|
1506
1488
|
self.dilated_conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, dilation=1, padding=1)
|
1507
1489
|
self.spatial_attention = nn.Conv2d(out_channels, out_channels, kernel_size=1)
|
1508
1490
|
|
1509
1491
|
def custom_forward(self, x):
|
1510
|
-
"""
|
1511
|
-
Custom forward method for the module.
|
1512
|
-
|
1513
|
-
Args:
|
1514
|
-
x (torch.Tensor): Input tensor.
|
1515
|
-
|
1516
|
-
Returns:
|
1517
|
-
torch.Tensor: Output tensor.
|
1518
|
-
"""
|
1519
1492
|
x1 = F.relu(self.dilated_conv1(x), inplace=True)
|
1520
1493
|
x = self.spatial_attention(x1)
|
1521
1494
|
return x
|
1522
1495
|
|
1523
1496
|
def forward(self, x):
|
1524
|
-
|
1525
|
-
Forward method for the module.
|
1526
|
-
|
1527
|
-
Args:
|
1528
|
-
x (torch.Tensor): Input tensor.
|
1529
|
-
|
1530
|
-
Returns:
|
1531
|
-
torch.Tensor: Output tensor.
|
1532
|
-
"""
|
1533
|
-
return checkpoint(self.custom_forward, x)
|
1497
|
+
return self.custom_forward(x)
|
1534
1498
|
|
1535
1499
|
# Final Classifier
|
1536
1500
|
class CustomCellClassifier(nn.Module):
|
@@ -2258,25 +2222,6 @@ def MLR(merged_df, refine_model):
|
|
2258
2222
|
|
2259
2223
|
return max_effects, max_effects_pvalues, model, df
|
2260
2224
|
|
2261
|
-
#def normalize_to_dtype(array, q1=2, q2=98, percentiles=None):
|
2262
|
-
# if len(array.shape) == 2:
|
2263
|
-
# array = np.expand_dims(array, axis=-1)
|
2264
|
-
# num_channels = array.shape[-1]
|
2265
|
-
# new_stack = np.empty_like(array)
|
2266
|
-
# for channel in range(num_channels):
|
2267
|
-
# img = array[..., channel]
|
2268
|
-
# non_zero_img = img[img > 0]
|
2269
|
-
# if non_zero_img.size > 0:
|
2270
|
-
# img_min = np.percentile(non_zero_img, q1)
|
2271
|
-
# img_max = np.percentile(non_zero_img, q2)
|
2272
|
-
# else:
|
2273
|
-
# img_min, img_max = (percentiles[channel] if percentiles and channel < len(percentiles)
|
2274
|
-
# else (img.min(), img.max()))
|
2275
|
-
# new_stack[..., channel] = rescale_intensity(img, in_range=(img_min, img_max), out_range='dtype')
|
2276
|
-
# if new_stack.shape[-1] == 1:
|
2277
|
-
# new_stack = np.squeeze(new_stack, axis=-1)
|
2278
|
-
# return new_stack
|
2279
|
-
|
2280
2225
|
def get_files_from_dir(dir_path, file_extension="*"):
|
2281
2226
|
return glob(os.path.join(dir_path, file_extension))
|
2282
2227
|
|
@@ -3875,19 +3820,19 @@ def preprocess_data(df, filter_by, remove_highly_correlated, log_data, exclude):
|
|
3875
3820
|
applying log transformation, filling NaN values, and scaling the numeric data.
|
3876
3821
|
|
3877
3822
|
Args:
|
3878
|
-
|
3879
|
-
|
3880
|
-
|
3881
|
-
|
3882
|
-
|
3883
|
-
|
3884
|
-
|
3823
|
+
df (pandas.DataFrame): The input dataframe.
|
3824
|
+
filter_by (str or None): The channel of interest to filter the dataframe by.
|
3825
|
+
remove_highly_correlated (bool or float): Whether to remove highly correlated columns.
|
3826
|
+
If a float is provided, it represents the correlation threshold.
|
3827
|
+
log_data (bool): Whether to apply log transformation to the numeric data.
|
3828
|
+
exclude (list or None): List of features to exclude from the filtering process.
|
3829
|
+
verbose (bool): Whether to print verbose output during preprocessing.
|
3885
3830
|
|
3886
3831
|
Returns:
|
3887
|
-
|
3832
|
+
numpy.ndarray: The preprocessed numeric data.
|
3888
3833
|
|
3889
3834
|
Raises:
|
3890
|
-
|
3835
|
+
ValueError: If no numeric columns are available after filtering.
|
3891
3836
|
|
3892
3837
|
"""
|
3893
3838
|
# Apply filtering based on the `filter_by` parameter
|
@@ -3927,13 +3872,8 @@ def filter_dataframe_features(df, channel_of_interest, exclude=None):
|
|
3927
3872
|
|
3928
3873
|
Parameters:
|
3929
3874
|
- df (pandas.DataFrame): The input dataframe to be filtered.
|
3930
|
-
- channel_of_interest (str, int, list, None): The channel(s) of interest to filter the dataframe.
|
3931
|
-
|
3932
|
-
If an integer, only the specified channel is included. If a list, only the specified channels are included.
|
3933
|
-
If a string, only the specified channel is included.
|
3934
|
-
- exclude (str, list, None): The feature(s) to exclude from the filtered dataframe.
|
3935
|
-
If None, no features are excluded. If a string, the specified feature is excluded.
|
3936
|
-
If a list, the specified features are excluded.
|
3875
|
+
- channel_of_interest (str, int, list, None): The channel(s) of interest to filter the dataframe. If None, no filtering is applied. If 'morphology', only morphology features are included.If an integer, only the specified channel is included. If a list, only the specified channels are included.If a string, only the specified channel is included.
|
3876
|
+
- exclude (str, list, None): The feature(s) to exclude from the filtered dataframe. If None, no features are excluded. If a string, the specified feature is excluded.If a list, the specified features are excluded.
|
3937
3877
|
|
3938
3878
|
Returns:
|
3939
3879
|
- filtered_df (pandas.DataFrame): The filtered dataframe based on the specified parameters.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: spacr
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.71
|
4
4
|
Summary: Spatial phenotype analysis of crisp screens (SpaCr)
|
5
5
|
Home-page: https://github.com/EinarOlafsson/spacr
|
6
6
|
Author: Einar Birnir Olafsson
|
@@ -9,7 +9,7 @@ Classifier: Programming Language :: Python :: 3
|
|
9
9
|
Classifier: License :: OSI Approved :: MIT License
|
10
10
|
Classifier: Operating System :: OS Independent
|
11
11
|
License-File: LICENSE
|
12
|
-
Requires-Dist: dgl
|
12
|
+
Requires-Dist: dgl ==0.9.1
|
13
13
|
Requires-Dist: torch <3.0,>=2.2.1
|
14
14
|
Requires-Dist: torchvision <1.0,>=0.17.1
|
15
15
|
Requires-Dist: torch-geometric <3.0,>=2.5.1
|
@@ -46,6 +46,8 @@ Requires-Dist: opencv-python ; extra == 'full'
|
|
46
46
|
Provides-Extra: headless
|
47
47
|
Requires-Dist: opencv-python-headless ; extra == 'headless'
|
48
48
|
|
49
|
+
.. |Documentation Status| image:: https://readthedocs.org/projects/spacr/badge/?version=latest
|
50
|
+
:target: https://spacr.readthedocs.io/en/latest/?badge=latest
|
49
51
|
.. |PyPI version| image:: https://badge.fury.io/py/spacr.svg
|
50
52
|
:target: https://badge.fury.io/py/spacr
|
51
53
|
.. |Python version| image:: https://img.shields.io/pypi/pyversions/spacr
|
@@ -55,25 +57,25 @@ Requires-Dist: opencv-python-headless ; extra == 'headless'
|
|
55
57
|
.. |repo size| image:: https://img.shields.io/github/repo-size/EinarOlafsson/spacr
|
56
58
|
:target: https://github.com/EinarOlafsson/spacr/
|
57
59
|
|
58
|
-
|PyPI version| |Python version| |Licence: GPL v3| |repo size|
|
60
|
+
|Documentation Status| |PyPI version| |Python version| |Licence: GPL v3| |repo size|
|
59
61
|
|
60
62
|
SpaCr
|
61
63
|
=====
|
62
64
|
|
63
|
-
Spatial phenotype analysis of CRISPR-Cas9 screens (SpaCr). The spatial organization of organelles and proteins within cells constitutes a key level of functional regulation. In the context of infectious disease, the spatial relationships between host cell structures and intracellular pathogens are critical to understand host clearance mechanisms and how pathogens evade them.
|
65
|
+
Spatial phenotype analysis of CRISPR-Cas9 screens (SpaCr). The spatial organization of organelles and proteins within cells constitutes a key level of functional regulation. In the context of infectious disease, the spatial relationships between host cell structures and intracellular pathogens are critical to understand host clearance mechanisms and how pathogens evade them. SpaCr is a Python-based software package for generating single-cell image data for deep-learning sub-cellular/cellular phenotypic classification from pooled genetic CRISPR-Cas9 screens. SpaCr provides a flexible toolset to extract single-cell images and measurements from high-content cell painting experiments, train deep-learning models to classify cellular/subcellular phenotypes, simulate, and analyze pooled CRISPR-Cas9 imaging screens.
|
64
66
|
|
65
67
|
Features
|
66
68
|
--------
|
67
69
|
|
68
70
|
- **Generate Masks:** Generate cellpose masks of cell, nuclei, and pathogen objects.
|
69
71
|
|
70
|
-
- **Object Measurements:** Measurements for each object including scikit-image-regionprops, intensity percentiles, shannon-entropy, pearsons and manders correlations, homogeneity and radial distribution. Measurements are saved to a SQL database in object
|
72
|
+
- **Object Measurements:** Measurements for each object including scikit-image-regionprops, intensity percentiles, shannon-entropy, pearsons and manders correlations, homogeneity, and radial distribution. Measurements are saved to a SQL database in object-level tables.
|
71
73
|
|
72
|
-
- **Crop Images:** Objects (e.g
|
74
|
+
- **Crop Images:** Objects (e.g., cells) can be saved as PNGs from the object area or bounding box area of each object. Object paths are saved in a SQL database that can be annotated and used to train CNNs/Transformer models for classification tasks.
|
73
75
|
|
74
76
|
- **Train CNNs or Transformers:** Train Torch Convolutional Neural Networks (CNNs) or Transformers to classify single object images. Train Torch models with IRM/ERM, checkpointing.
|
75
77
|
|
76
|
-
- **Manual Annotation:** Supports manual annotation of single
|
78
|
+
- **Manual Annotation:** Supports manual annotation of single-cell images and segmentation to refine training datasets for training CNNs/Transformers or cellpose, respectively.
|
77
79
|
|
78
80
|
- **Finetune Cellpose Models:** Adjust pre-existing Cellpose models to your specific dataset for improved performance.
|
79
81
|
|
@@ -93,7 +95,7 @@ Requires Tkinter for graphical user interface features.
|
|
93
95
|
Ubuntu
|
94
96
|
~~~~~~
|
95
97
|
|
96
|
-
Before installing
|
98
|
+
Before installing SpaCr, ensure Tkinter is installed:
|
97
99
|
|
98
100
|
(Tkinter is included with the standard Python installation on macOS, and Windows)
|
99
101
|
|