spacr 0.3.42__py3-none-any.whl → 0.3.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/cellpose.py CHANGED
@@ -86,7 +86,6 @@ def identify_masks_finetune(settings):
86
86
  if normalize:
87
87
  images, _, image_names, _, orig_dims = _load_normalized_images_and_labels(image_files=image_files, label_files=None, channels=channels, percentiles=percentiles, circular=circular, invert=invert, visualize=verbose, remove_background=remove_background, background=background, Signal_to_noise=Signal_to_noise, target_height=target_height, target_width=target_width)
88
88
  images = [np.squeeze(img) if img.shape[-1] == 1 else img for img in images]
89
- #orig_dims = [(image.shape[0], image.shape[1]) for image in images]
90
89
  else:
91
90
  images, _, image_names, _ = _load_images_and_labels(image_files=image_files, label_files=None, circular=circular, invert=invert)
92
91
  images = [np.squeeze(img) if img.shape[-1] == 1 else img for img in images]
@@ -126,7 +125,6 @@ def identify_masks_finetune(settings):
126
125
  print_progress(files_processed, files_to_process, n_jobs=1, time_ls=time_ls)
127
126
  print_progress(files_processed, files_to_process, n_jobs=1, time_ls=time_ls, batch_size=None, operation_type="")
128
127
 
129
-
130
128
  if verbose:
131
129
  if resize:
132
130
  stack = resizescikit(stack, dims, preserve_range=True, anti_aliasing=False).astype(stack.dtype)
spacr/gui_core.py CHANGED
@@ -384,8 +384,8 @@ def import_settings(settings_type='mask'):
384
384
  from .gui_utils import convert_settings_dict_for_gui, hide_all_settings
385
385
  from .settings import generate_fields, set_default_settings_preprocess_generate_masks, get_measure_crop_settings, set_default_train_test_model
386
386
  from .settings import set_default_generate_barecode_mapping, set_default_umap_image_settings, get_analyze_recruitment_default_settings
387
- from .settings import get_default_generate_activation_map_settings
388
- #activation
387
+ from .settings import get_default_generate_activation_map_settings, get_analyze_plaque_settings
388
+
389
389
  def read_settings_from_csv(csv_file_path):
390
390
  settings = {}
391
391
  with open(csv_file_path, newline='') as csvfile:
@@ -428,7 +428,7 @@ def import_settings(settings_type='mask'):
428
428
  elif settings_type == 'activation':
429
429
  settings = get_default_generate_activation_map_settings(settings={})
430
430
  elif settings_type == 'analyze_plaques':
431
- settings = {}
431
+ settings = get_analyze_plaque_settings(settings={})
432
432
  elif settings_type == 'convert':
433
433
  settings = {}
434
434
  else:
@@ -443,7 +443,7 @@ def setup_settings_panel(vertical_container, settings_type='mask'):
443
443
  global vars_dict, scrollable_frame
444
444
  from .settings import get_identify_masks_finetune_default_settings, set_default_analyze_screen, set_default_settings_preprocess_generate_masks
445
445
  from .settings import get_measure_crop_settings, deep_spacr_defaults, set_default_generate_barecode_mapping, set_default_umap_image_settings
446
- from .settings import get_map_barcodes_default_settings, get_analyze_recruitment_default_settings, get_check_cellpose_models_default_settings
446
+ from .settings import get_map_barcodes_default_settings, get_analyze_recruitment_default_settings, get_check_cellpose_models_default_settings, get_analyze_plaque_settings
447
447
  from .settings import generate_fields, get_perform_regression_default_settings, get_train_cellpose_default_settings, get_default_generate_activation_map_settings
448
448
  from .gui_utils import convert_settings_dict_for_gui
449
449
  from .gui_elements import set_element_size
@@ -490,7 +490,7 @@ def setup_settings_panel(vertical_container, settings_type='mask'):
490
490
  elif settings_type == 'activation':
491
491
  settings = get_default_generate_activation_map_settings(settings={})
492
492
  elif settings_type == 'analyze_plaques':
493
- settings = {'src':'path to images'}
493
+ settings = get_analyze_plaque_settings(settings={})
494
494
  elif settings_type == 'convert':
495
495
  settings = {'src':'path to images'}
496
496
  else:
spacr/gui_utils.py CHANGED
@@ -380,7 +380,7 @@ def convert_settings_dict_for_gui(settings):
380
380
  variables = {}
381
381
  special_cases = {
382
382
  'metadata_type': ('combo', ['cellvoyager', 'cq1', 'nikon', 'zeis', 'custom'], 'cellvoyager'),
383
- 'channels': ('combo', ['[0,1,2,3]', '[0,1,2]', '[0,1]', '[0]'], '[0,1,2,3]'),
383
+ 'channels': ('combo', ['[0,1,2,3]', '[0,1,2]', '[0,1]', '[0]', '[0,0]'], '[0,1,2,3]'),
384
384
  'train_channels': ('combo', ["['r','g','b']", "['r','g']", "['r','b']", "['g','b']", "['r']", "['g']", "['b']"], "['r','g','b']"),
385
385
  'channel_dims': ('combo', ['[0,1,2,3]', '[0,1,2]', '[0,1]', '[0]'], '[0,1,2,3]'),
386
386
  'dataset_mode': ('combo', ['annotation', 'metadata', 'recruitment'], 'metadata'),
spacr/io.py CHANGED
@@ -191,107 +191,130 @@ def _load_images_and_labels(image_files, label_files, circular=False, invert=Fal
191
191
  print(f'image shape: {images[0].shape}, image type: images[0].shape mask shape: {labels[0].shape}, image type: labels[0].shape')
192
192
  return images, labels, image_names, label_names
193
193
 
194
- def _load_normalized_images_and_labels_v1(image_files, label_files, channels=None, percentiles=None, circular=False, invert=False, visualize=False, remove_background=False, background=0, Signal_to_noise=10):
194
+ def _load_normalized_images_and_labels(image_files, label_files, channels=None, percentiles=None,
195
+ circular=False, invert=False, visualize=False,
196
+ remove_background=False, background=0, Signal_to_noise=10,
197
+ target_height=None, target_width=None):
195
198
 
196
- from .plot import normalize_and_visualize
199
+ from .plot import normalize_and_visualize, plot_resize
197
200
  from .utils import invert_image, apply_mask
201
+ from skimage.transform import resize as resizescikit
202
+
203
+ # Ensure percentiles are valid
204
+ if isinstance(percentiles, list) and len(percentiles) == 2:
205
+ try:
206
+ percentiles = [int(percentiles[0]), int(percentiles[1])]
207
+ except ValueError:
208
+ percentiles = None
209
+ else:
210
+ percentiles = None
198
211
 
199
- signal_thresholds = background*Signal_to_noise
212
+ signal_thresholds = float(background) * float(Signal_to_noise)
200
213
  lower_percentile = 2
201
214
 
202
- images = []
203
- labels = []
204
-
215
+ images, labels, orig_dims = [], [], []
205
216
  num_channels = 4
206
217
  percentiles_1 = [[] for _ in range(num_channels)]
207
218
  percentiles_99 = [[] for _ in range(num_channels)]
208
219
 
209
220
  image_names = [os.path.basename(f) for f in image_files]
210
-
221
+ image_dir = os.path.dirname(image_files[0])
222
+
211
223
  if label_files is not None:
212
224
  label_names = [os.path.basename(f) for f in label_files]
213
225
  label_dir = os.path.dirname(label_files[0])
226
+ else:
227
+ label_names, label_dir = [], None
214
228
 
215
- # Load images and check percentiles
216
- for i,img_file in enumerate(image_files):
229
+ # Load, normalize, and resize images
230
+ for i, img_file in enumerate(image_files):
217
231
  image = cellpose.io.imread(img_file)
232
+ orig_dims.append((image.shape[0], image.shape[1]))
233
+
218
234
  if invert:
219
235
  image = invert_image(image)
220
236
  if circular:
221
237
  image = apply_mask(image, output_value=0)
222
238
 
223
- # If specific channels are specified, select them
239
+ # Select specific channels if needed
224
240
  if channels is not None and image.ndim == 3:
225
241
  image = image[..., channels]
226
242
 
227
243
  if remove_background:
228
- image[image < background] = 0
229
-
244
+ image = np.where(image < background, 0, image)
245
+
230
246
  if image.ndim < 3:
231
247
  image = np.expand_dims(image, axis=-1)
232
-
233
- images.append(image)
248
+
249
+ # Calculate percentiles if not provided
234
250
  if percentiles is None:
235
251
  for c in range(image.shape[-1]):
236
252
  p1 = np.percentile(image[..., c], lower_percentile)
237
253
  percentiles_1[c].append(p1)
254
+
255
+ # Ensure `signal_thresholds` and `p` are floats for comparison
238
256
  for percentile in [98, 99, 99.9, 99.99, 99.999]:
239
257
  p = np.percentile(image[..., c], percentile)
240
- if p > signal_thresholds:
258
+ if float(p) > signal_thresholds:
241
259
  percentiles_99[c].append(p)
242
260
  break
243
-
244
- if not percentiles is None:
245
- normalized_images = []
246
- for image in images:
247
- normalized_image = np.zeros_like(image, dtype=np.float32)
248
- for c in range(image.shape[-1]):
249
- low_p = np.percentile(image[..., c], percentiles[0])
250
- high_p = np.percentile(image[..., c], percentiles[1])
251
- normalized_image[..., c] = rescale_intensity(image[..., c], in_range=(low_p, high_p), out_range=(0, 1))
252
- normalized_images.append(normalized_image)
253
- if visualize:
254
- normalize_and_visualize(image, normalized_image, title=f"Channel {c+1} Normalized")
255
-
261
+
262
+ # Resize image if required
263
+ if target_height and target_width:
264
+ image_shape = (target_height, target_width) if image.ndim == 2 else (target_height, target_width, image.shape[-1])
265
+ image = resizescikit(image, image_shape, preserve_range=True, anti_aliasing=True).astype(image.dtype)
266
+
267
+ images.append(image)
268
+
269
+ # Calculate average percentiles if needed
256
270
  if percentiles is None:
257
- # Calculate average percentiles for normalization
258
271
  avg_p1 = [np.mean(p) for p in percentiles_1]
259
- avg_p99 = [np.mean(p) if len(p) > 0 else np.mean(percentiles_1[i]) for i, p in enumerate(percentiles_99)]
272
+ avg_p99 = [np.mean(p) if p else avg_p1[i] for i, p in enumerate(percentiles_99)]
260
273
 
261
274
  print(f'Average 1st percentiles: {avg_p1}, Average 99th percentiles: {avg_p99}')
262
275
 
263
- normalized_images = []
264
- for image in images:
265
- normalized_image = np.zeros_like(image, dtype=np.float32)
266
- for c in range(image.shape[-1]):
267
- normalized_image[..., c] = rescale_intensity(image[..., c], in_range=(avg_p1[c], avg_p99[c]), out_range=(0, 1))
268
- normalized_images.append(normalized_image)
269
- if visualize:
270
- normalize_and_visualize(image, normalized_image, title=f"Channel {c+1} Normalized")
271
-
272
- if not image_files is None:
273
- image_dir = os.path.dirname(image_files[0])
276
+ normalized_images = [
277
+ np.stack([rescale_intensity(img[..., c], in_range=(avg_p1[c], avg_p99[c]), out_range=(0, 1))
278
+ for c in range(img.shape[-1])], axis=-1) for img in images
279
+ ]
274
280
 
275
281
  else:
276
- image_dir = None
277
-
282
+ normalized_images = [
283
+ np.stack([rescale_intensity(img[..., c],
284
+ in_range=(np.percentile(img[..., c], percentiles[0]),
285
+ np.percentile(img[..., c], percentiles[1])),
286
+ out_range=(0, 1)) for c in range(img.shape[-1])], axis=-1)
287
+ for img in images
288
+ ]
289
+
290
+ # Load and resize labels if provided
278
291
  if label_files is not None:
279
- for lbl_file in label_files:
280
- labels.append(cellpose.io.imread(lbl_file))
281
- else:
282
- label_names = []
283
- label_dir = None
292
+ labels = [resizescikit(cellpose.io.imread(lbl_file),
293
+ (target_height, target_width) if target_height and target_width else orig_dims[i],
294
+ order=0, preserve_range=True, anti_aliasing=False).astype(np.uint8)
295
+ for i, lbl_file in enumerate(label_files)]
284
296
 
285
297
  print(f'Loaded and normalized {len(normalized_images)} images and {len(labels)} labels from {image_dir} and {label_dir}')
286
-
287
- return normalized_images, labels, image_names, label_names
288
298
 
289
- def _load_normalized_images_and_labels(image_files, label_files, channels=None, percentiles=None, circular=False, invert=False, visualize=False, remove_background=False, background=0, Signal_to_noise=10, target_height=None, target_width=None):
299
+ if visualize and images and labels:
300
+ plot_resize(images, normalized_images, labels, labels)
301
+
302
+ return normalized_images, labels, image_names, label_names, orig_dims
303
+
304
+ def _load_normalized_images_and_labels_v1(image_files, label_files, channels=None, percentiles=None, circular=False, invert=False, visualize=False, remove_background=False, background=0, Signal_to_noise=10, target_height=None, target_width=None):
290
305
 
291
306
  from .plot import normalize_and_visualize, plot_resize
292
307
  from .utils import invert_image, apply_mask
293
308
  from skimage.transform import resize as resizescikit
294
309
 
310
+ if isinstance(percentiles, list):
311
+ if len(percentiles) !=2:
312
+ percentiles = None
313
+ if not percentiles[0] is int:
314
+ percentiles = None
315
+ if not percentiles[1] is int:
316
+ percentiles = None
317
+
295
318
  signal_thresholds = background * Signal_to_noise
296
319
  lower_percentile = 2
297
320
 
spacr/plot.py CHANGED
@@ -1521,7 +1521,7 @@ def plot_plates(df, variable, grouping, min_max, cmap, min_count=0, verbose=True
1521
1521
  return fig
1522
1522
 
1523
1523
  def print_mask_and_flows(stack, mask, flows, overlay=False):
1524
- fig, axs = plt.subplots(1, 3, figsize=(30, 10)) # Adjust subplot layout
1524
+ fig, axs = plt.subplots(1, 3, figsize=(12, 4)) # Adjust subplot layout
1525
1525
 
1526
1526
  if stack.shape[-1] == 1:
1527
1527
  stack = np.squeeze(stack)
spacr/settings.py CHANGED
@@ -581,6 +581,7 @@ def get_check_cellpose_models_default_settings(settings):
581
581
  return settings
582
582
 
583
583
  def get_identify_masks_finetune_default_settings(settings):
584
+ settings.setdefault('src', 'path')
584
585
  settings.setdefault('model_name', 'cyto')
585
586
  settings.setdefault('custom_model', None)
586
587
  settings.setdefault('channels', [0,0])
@@ -690,6 +691,7 @@ expected_types = {
690
691
  "filter_min_max": (list, type(None)),
691
692
  "channel_dims": list,
692
693
  "backgrounds": list,
694
+ "background": str,
693
695
  "outline_thickness": int,
694
696
  "outline_color": str,
695
697
  "overlay_chans": list,
@@ -909,12 +911,12 @@ categories = {"Paths":[ "src", "grna", "barcodes", "custom_model_path", "dataset
909
911
  "Hyperparamiters (Embedding)": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
910
912
  "Hyperparamiters (Clustering)": ["eps","min_samples","analyze_clusters","clustering","remove_cluster_noise"],
911
913
  "Hyperparamiters (Regression)":["cov_type", "class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "random_row_column_effects", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable"],
912
- "Hyperparamiters (Activation)":["cam_type", "normalize", "overlay", "correlation", "target_layer", "normalize_input"],
914
+ "Hyperparamiters (Activation)":["cam_type", "overlay", "correlation", "target_layer", "normalize_input"],
913
915
  "Annotation": ["nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "channel_of_interest", "measurement", "treatments", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
914
916
  "Plot": ["plot", "plot_control", "plot_nr", "examples_to_plot", "normalize_plots", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
915
917
  "Test": ["test_mode", "test_images", "random_test", "test_nr", "test", "test_split"],
916
918
  "Timelapse": ["timelapse", "fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
917
- "Advanced": ["shuffle", "target_intensity_min", "cells_per_well", "nuclei_limit", "pathogen_limit", "uninfected", "backgrounds", "schedule", "test_size","exclude","n_repeats","top_features", "model_type_ml", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs"],
919
+ "Advanced": ["shuffle", "target_intensity_min", "cells_per_well", "nuclei_limit", "pathogen_limit", "uninfected", "background", "backgrounds", "schedule", "test_size","exclude","n_repeats","top_features", "model_type_ml", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs"],
918
920
  "Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"]
919
921
  }
920
922
 
@@ -1367,4 +1369,31 @@ def get_default_generate_activation_map_settings(settings):
1367
1369
  settings.setdefault('manders_thresholds', [15,50, 75])
1368
1370
  settings.setdefault('n_jobs', None)
1369
1371
 
1372
+ return settings
1373
+
1374
+ def get_analyze_plaque_settings(settings):
1375
+ settings.setdefault('src', 'path')
1376
+ settings.setdefault('masks', True)
1377
+ settings.setdefault('model_name', 'plaque')
1378
+ settings.setdefault('custom_model', None)
1379
+ settings.setdefault('channels', [0,0])
1380
+ settings.setdefault('background', 200)
1381
+ settings.setdefault('remove_background', False)
1382
+ settings.setdefault('Signal_to_noise', 10)
1383
+ settings.setdefault('CP_prob', 0)
1384
+ settings.setdefault('diameter', 30)
1385
+ settings.setdefault('batch_size', 50)
1386
+ settings.setdefault('flow_threshold', 0.4)
1387
+ settings.setdefault('save', True)
1388
+ settings.setdefault('verbose', True)
1389
+ settings.setdefault('normalize', True)
1390
+ settings.setdefault('percentiles', None)
1391
+ settings.setdefault('circular', False)
1392
+ settings.setdefault('invert', False)
1393
+ settings.setdefault('resize', True)
1394
+ settings.setdefault('target_height', 1120)
1395
+ settings.setdefault('target_width', 1120)
1396
+ settings.setdefault('rescale', False)
1397
+ settings.setdefault('resample', False)
1398
+ settings.setdefault('grayscale', True)
1370
1399
  return settings
spacr/submodules.py CHANGED
@@ -8,6 +8,9 @@ from cellpose import models as cp_models
8
8
  from cellpose import train as train_cp
9
9
  from IPython.display import display
10
10
 
11
+ import matplotlib.pyplot as plt
12
+ from natsort import natsorted
13
+
11
14
  def analyze_recruitment(settings={}):
12
15
  """
13
16
  Analyze recruitment data by grouping the DataFrame by well coordinates and plotting controls and recruitment data.
@@ -122,7 +125,31 @@ def analyze_recruitment(settings={}):
122
125
 
123
126
  return [cells,wells]
124
127
 
125
- def analyze_plaques(folder):
128
+ def analyze_plaques(settings):
129
+
130
+ from .cellpose import identify_masks_finetune
131
+ from .settings import get_analyze_plaque_settings
132
+ from .utils import save_settings, download_models
133
+ from spacr import __file__ as spacr_path
134
+
135
+ download_models()
136
+ package_dir = os.path.dirname(spacr_path)
137
+ models_dir = os.path.join(package_dir, 'resources', 'models', 'cp')
138
+ model_path = os.path.join(models_dir, 'toxo_plaque_cyto_e25000_X1120_Y1120.CP_model')
139
+ settings['custom_model'] = model_path
140
+ print('custom_model',settings['custom_model'])
141
+
142
+ settings = get_analyze_plaque_settings(settings)
143
+ save_settings(settings, name='analyze_plaques', show=True)
144
+
145
+ if settings['masks']:
146
+ settings['dst'] = os.path.join(settings['src'], 'masks')
147
+ display(settings)
148
+ identify_masks_finetune(settings)
149
+ folder = settings['dst']
150
+ else:
151
+ folder = settings['src']
152
+
126
153
  summary_data = []
127
154
  details_data = []
128
155
  stats_data = []
@@ -346,4 +373,136 @@ def count_phenotypes(settings):
346
373
 
347
374
  pivot_df.to_csv(output_path)
348
375
 
349
- return
376
+ return
377
+
378
+ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={}, column='column', value='c3', plate='plate1', fraction_threshold=0.05):
379
+
380
+ def calculate_well_score_fractions(df, class_columns='cv_predictions'):
381
+ if all(col in df.columns for col in ['plate', 'row', 'column']):
382
+ df['prc'] = df['plate'] + '_' + df['row'] + '_' + df['column']
383
+ else:
384
+ raise ValueError("Cannot find 'plate', 'row', or 'column' in df.columns")
385
+ prc_summary = df.groupby(['plate', 'row', 'column', 'prc']).size().reset_index(name='total_rows')
386
+ well_counts = (df.groupby(['plate', 'row', 'column', 'prc', class_columns])
387
+ .size()
388
+ .unstack(fill_value=0)
389
+ .reset_index()
390
+ .rename(columns={0: 'class_0', 1: 'class_1'}))
391
+ summary_df = pd.merge(prc_summary, well_counts, on=['plate', 'row', 'column', 'prc'], how='left')
392
+ summary_df['class_0_fraction'] = summary_df['class_0'] / summary_df['total_rows']
393
+ summary_df['class_1_fraction'] = summary_df['class_1'] / summary_df['total_rows']
394
+ return summary_df
395
+
396
+ def plot_line(df, x_column, y_columns, group_column=None,
397
+ xlabel=None, ylabel=None, title=None, figsize=(10, 6),
398
+ save_path=None):
399
+ """
400
+ Create a line plot that can handle multiple y-columns, each becoming a separate line.
401
+ """
402
+ df = df.loc[natsorted(df.index, key=lambda x: df.loc[x, x_column])]
403
+
404
+ plt.figure(figsize=figsize)
405
+
406
+ if isinstance(y_columns, list):
407
+ for y_col in y_columns:
408
+ sns.lineplot(data=df, x=x_column, y=y_col, label=y_col, marker='o')
409
+ else:
410
+ sns.lineplot(data=df, x=x_column, y=y_columns, hue=group_column, marker='o')
411
+ plt.xlabel(xlabel if xlabel else x_column)
412
+ plt.ylabel(ylabel if ylabel else 'Value')
413
+ plt.title(title if title else f'Line Plot')
414
+ if group_column or isinstance(y_columns, list):
415
+ plt.legend(title='Legend')
416
+
417
+ plt.tight_layout()
418
+
419
+ if save_path:
420
+ plt.savefig(save_path, format='png', dpi=300, bbox_inches='tight')
421
+ print(f"Plot saved to {save_path}")
422
+ plt.show()
423
+
424
+ def calculate_grna_fraction_ratio(df, grna1='TGGT1_220950_1', grna2='TGGT1_233460_4'):
425
+ # Filter relevant grna_names within each prc and group them
426
+ grouped = df[df['grna_name'].isin([grna1, grna2])] \
427
+ .groupby(['prc', 'grna_name']) \
428
+ .agg({'fraction': 'sum', 'count': 'sum'}) \
429
+ .unstack(fill_value=0)
430
+ grouped.columns = ['_'.join(col).strip() for col in grouped.columns.values]
431
+ grouped['fraction_ratio'] = grouped[f'fraction_{grna1}'] / grouped[f'fraction_{grna2}']
432
+ grouped = grouped.assign(
433
+ fraction_ratio=lambda x: x['fraction_ratio'].replace([float('inf'), -float('inf')], 0)
434
+ ).fillna({'fraction_ratio': 0})
435
+ grouped = grouped.rename(columns={
436
+ f'count_{grna1}': f'{grna1}_count',
437
+ f'count_{grna2}': f'{grna2}_count'
438
+ })
439
+ result = grouped.reset_index()[['prc', f'{grna1}_count', f'{grna2}_count', 'fraction_ratio']]
440
+
441
+ result['total_reads'] = result[f'{grna1}_count'] + result[f'{grna2}_count']
442
+
443
+ result[f'{grna1}_fraction'] = result[f'{grna1}_count'] / result['total_reads']
444
+ result[f'{grna2}_fraction'] = result[f'{grna2}_count'] / result['total_reads']
445
+
446
+ return result
447
+
448
+ def calculate_well_read_fraction(df, count_column='count'):
449
+ if all(col in df.columns for col in ['plate', 'row', 'column']):
450
+ df['prc'] = df['plate'] + '_' + df['row'] + '_' + df['column']
451
+ else:
452
+ raise ValueError("Cannot find plate, row or column in df.columns")
453
+ grouped_df = df.groupby('prc')[count_column].sum().reset_index()
454
+ grouped_df = grouped_df.rename(columns={count_column: 'total_counts'})
455
+ df = pd.merge(df, grouped_df, on='prc')
456
+ df['fraction'] = df['count'] / df['total_counts']
457
+ return df
458
+
459
+ reads_df = pd.read_csv(reads_csv)
460
+ scores_df = pd.read_csv(scores_csv)
461
+
462
+ if plate != None:
463
+ reads_df['plate'] = plate
464
+ scores_df['plate'] = plate
465
+
466
+ if 'col' in reads_df.columns:
467
+ reads_df = reads_df.rename(columns={'col': 'column'})
468
+ if 'column_name' in reads_df.columns:
469
+ reads_df = reads_df.rename(columns={'column_name': 'column'})
470
+ if 'col' in scores_df.columns:
471
+ scores_df = scores_df.rename(columns={'col': 'column'})
472
+ if 'column_name' in scores_df.columns:
473
+ scores_df = scores_df.rename(columns={'column_name': 'column'})
474
+ if 'row_name' in reads_df.columns:
475
+ reads_df = reads_df.rename(columns={'row_name': 'row'})
476
+ if 'row_name' in scores_df.columns:
477
+ scores_df = scores_df.rename(columns={'row_name': 'row'})
478
+
479
+ reads_df = calculate_well_read_fraction(reads_df)
480
+ scores_df = calculate_well_score_fractions(scores_df)
481
+ reads_col_df = reads_df[reads_df[column]==value]
482
+ scores_col_df = scores_df[scores_df[column]==value]
483
+
484
+ #reads_col_df = reads_col_df[reads_col_df['fraction'] >= fraction_threshold]
485
+ reads_col_df = calculate_grna_fraction_ratio(reads_col_df, grna1='TGGT1_220950_1', grna2='TGGT1_233460_4')
486
+ df = pd.merge(reads_col_df, scores_col_df, on='prc')
487
+
488
+
489
+ # Convert the dictionary to a DataFrame and calculate fractions
490
+ df_emp = pd.DataFrame(
491
+ [(key, val[0], val[1], val[0] / (val[0] + val[1]), val[1] / (val[0] + val[1]))
492
+ for key, val in empirical_dict.items()],
493
+ columns=['key', 'value1', 'value2', 'fraction1', 'fraction2']
494
+ )
495
+
496
+ df = pd.merge(df, df_emp, left_on='row', right_on='key')
497
+ display(df)
498
+ y_columns = ['class_1_fraction', 'TGGT1_220950_1_fraction', 'fraction2']
499
+
500
+ plot_line(df, x_column='row', y_columns=y_columns, group_column=None,
501
+ xlabel=None, ylabel=None, title=None, figsize=(10, 6),
502
+ save_path=None)
503
+
504
+ y_columns = ['class_0_fraction', 'TGGT1_233460_4_fraction', 'fraction1']
505
+
506
+ plot_line(df, x_column='row', y_columns=y_columns, group_column=None,
507
+ xlabel=None, ylabel=None, title=None, figsize=(10, 6),
508
+ save_path=None)
spacr/utils.py CHANGED
@@ -64,6 +64,7 @@ from sklearn.decomposition import PCA
64
64
  from sklearn.ensemble import RandomForestClassifier
65
65
 
66
66
  from huggingface_hub import list_repo_files
67
+ from spacr import __file__ as spacr_path
67
68
 
68
69
  import umap.umap_ as umap
69
70
  #import umap
@@ -4963,7 +4964,71 @@ def map_condition(col_value, neg='c1', pos='c2', mix='c3'):
4963
4964
  else:
4964
4965
  return 'screen'
4965
4966
 
4966
- def download_models(repo_id="einarolafsson/models", local_dir=None, retries=5, delay=5):
4967
+ def download_models(repo_id="einarolafsson/models", retries=5, delay=5):
4968
+ """
4969
+ Downloads all model files from Hugging Face and stores them in the `resources/models` directory
4970
+ within the installed `spacr` package.
4971
+
4972
+ Args:
4973
+ repo_id (str): The repository ID on Hugging Face (default is 'einarolafsson/models').
4974
+ retries (int): Number of retry attempts in case of failure.
4975
+ delay (int): Delay in seconds between retries.
4976
+
4977
+ Returns:
4978
+ str: The local path to the downloaded models.
4979
+ """
4980
+ # Construct the path to the `resources/models` directory in the installed `spacr` package
4981
+ package_dir = os.path.dirname(spacr_path)
4982
+ local_dir = os.path.join(package_dir, 'resources', 'models')
4983
+
4984
+ # Create the local directory if it doesn't exist
4985
+ if not os.path.exists(local_dir):
4986
+ os.makedirs(local_dir)
4987
+ elif len(os.listdir(local_dir)) > 0:
4988
+ print(f"Models already downloaded to: {local_dir}")
4989
+ return local_dir
4990
+
4991
+ attempt = 0
4992
+ while attempt < retries:
4993
+ try:
4994
+ # List all files in the repo
4995
+ files = list_repo_files(repo_id, repo_type="dataset")
4996
+ print(f"Files in repository: {files}") # Debugging print to check file list
4997
+
4998
+ # Download each file
4999
+ for file_name in files:
5000
+ for download_attempt in range(retries):
5001
+ try:
5002
+ url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{file_name}?download=true"
5003
+ print(f"Downloading file from: {url}") # Debugging
5004
+
5005
+ response = requests.get(url, stream=True)
5006
+ print(f"HTTP response status: {response.status_code}") # Debugging
5007
+ response.raise_for_status()
5008
+
5009
+ # Save the file locally
5010
+ local_file_path = os.path.join(local_dir, os.path.basename(file_name))
5011
+ with open(local_file_path, 'wb') as file:
5012
+ for chunk in response.iter_content(chunk_size=8192):
5013
+ file.write(chunk)
5014
+ print(f"Downloaded model file: {file_name} to {local_file_path}")
5015
+ break # Exit the retry loop if successful
5016
+ except (requests.HTTPError, requests.Timeout) as e:
5017
+ print(f"Error downloading {file_name}: {e}. Retrying in {delay} seconds...")
5018
+ time.sleep(delay)
5019
+ else:
5020
+ raise Exception(f"Failed to download {file_name} after multiple attempts.")
5021
+
5022
+ return local_dir # Return the directory where models are saved
5023
+
5024
+ except (requests.HTTPError, requests.Timeout) as e:
5025
+ print(f"Error downloading files: {e}. Retrying in {delay} seconds...")
5026
+ attempt += 1
5027
+ time.sleep(delay)
5028
+
5029
+ raise Exception("Failed to download model files after multiple attempts.")
5030
+
5031
+ def download_models_v1(repo_id="einarolafsson/models", local_dir=None, retries=5, delay=5):
4967
5032
  """
4968
5033
  Downloads all model files from Hugging Face and stores them in the specified local directory.
4969
5034
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spacr
3
- Version: 0.3.42
3
+ Version: 0.3.43
4
4
  Summary: Spatial phenotype analysis of crisp screens (SpaCr)
5
5
  Home-page: https://github.com/EinarOlafsson/spacr
6
6
  Author: Einar Birnir Olafsson
@@ -7,27 +7,27 @@ spacr/app_mask.py,sha256=l-dBY8ftzCMdDe6-pXc2Nh_u-idNL9G7UOARiLJBtds,153
7
7
  spacr/app_measure.py,sha256=_K7APYIeOKpV6e_LcqabBjvEi7mfq9Fch8175x1x0k8,162
8
8
  spacr/app_sequencing.py,sha256=DjG26jy4cpddnV8WOOAIiExtOe9MleVMY4MFa5uTo5w,157
9
9
  spacr/app_umap.py,sha256=ZWAmf_OsIKbYvolYuWPMYhdlVe-n2CADoJulAizMiEo,153
10
- spacr/cellpose.py,sha256=zv4BzhaP2O-mtQ-pUfYvpOyxgn1ke_bDWgdHD5UWm9I,13942
10
+ spacr/cellpose.py,sha256=KxgPAHEs4iLYZA-h_HBYnpSB_rSZKhEBZ6Fs0I9x5E0,13849
11
11
  spacr/core.py,sha256=dW9RrAKFLfVsFhX0-kaVMc2T7b47Ky0pTXK-CEVOeWQ,48235
12
12
  spacr/deep_spacr.py,sha256=HdOcNU8cHcE_19nP7_5uTz-ih3E169ffr2Hm--NvMvA,43255
13
13
  spacr/gui.py,sha256=ARyn9Q_g8HoP-cXh1nzMLVFCKqthY4v2u9yORyaQqQE,8230
14
- spacr/gui_core.py,sha256=LV_HX5zreu3Bye6sQFDbOuk8Dfj4StMoohy6hsrDEXA,41363
14
+ spacr/gui_core.py,sha256=N7R7yvfK_dJhOReM_kW3Ci8Bokhi1OzsxeKqvSGdvV4,41460
15
15
  spacr/gui_elements.py,sha256=w-S1MZdyxt5O3DsNAHNNXy_WGfwBPg0NhwQtCsJeiao,137071
16
- spacr/gui_utils.py,sha256=7e9DsZIuV7-jh97kEf7v1In_cFzlFueV4SGcGYGpTxw,45454
17
- spacr/io.py,sha256=LN_gJq_oqjbf8y-lBtLLZtJi8DLbNdyoGEcBYyOjbhQ,143606
16
+ spacr/gui_utils.py,sha256=KDWDWsi7UdZVhXk1ZWGx3ZqJMIxCUm3lGfjrVhbk52s,45463
17
+ spacr/io.py,sha256=ahsUaDwvkCHxGu_uvhgNCGWiJL_-ze291rHHQvdrFXQ,144622
18
18
  spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
19
19
  spacr/measure.py,sha256=KdboGXoi85BO5-_6er7932FgjFI7G7tuaQDnWSiEuew,54817
20
20
  spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
21
21
  spacr/ml.py,sha256=vzuEnbQd96mn7T8h3GRsEDnpWSSpxd3ApGMXTiG6b2o,50507
22
22
  spacr/openai.py,sha256=5vBZ3Jl2llYcW3oaTEXgdyCB2aJujMUIO5K038z7w_A,1246
23
- spacr/plot.py,sha256=TDGMwiIHjvk6v94WFlIvemU-6JfEik_GmSez51vyvCc,135869
23
+ spacr/plot.py,sha256=mqD0XyExAZ_qhnz71bLJo7nTVGod2eN8bJ_9sAV2eN8,135868
24
24
  spacr/sequencing.py,sha256=t18mgpK6rhWuB1LtFOsPxqgpFXxuUmrD06ecsaVQ0Gw,19655
25
- spacr/settings.py,sha256=x3zcOpVbsxGvq4neW-H08CxzNl8thacy4WOxcIG4TAc,76607
25
+ spacr/settings.py,sha256=VkCgZ8r30Q3VmTmYCf2_KRX3htqXR80osOjq37vLbwM,77770
26
26
  spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
27
- spacr/submodules.py,sha256=AB7s6-cULsaqz-haAaCtXfGEIi8uPZGT4xoCslUJC3Y,18391
27
+ spacr/submodules.py,sha256=QRzojeHMZ2iRskmU5D7Q9iu6U1wPTODRm55r30KLZyY,25653
28
28
  spacr/timelapse.py,sha256=FSYpUtAVy6xc3lwprRYgyDTT9ysUhfRQ4zrP9_h2mvg,39465
29
29
  spacr/toxo.py,sha256=MVDfkfTl6fhbzg3izLWdtr2arARYIhI1TdScnHtPVqI,16770
30
- spacr/utils.py,sha256=Z8lmQJc8sdPvHi0ZmYOahuKtUmDcrYtRYlT4qNZORXU,216396
30
+ spacr/utils.py,sha256=yDxP8TslqLoKFpKyaCPDOulAitkDBR6MOwPG8FH8mYw,219417
31
31
  spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
32
32
  spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
33
33
  spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
@@ -150,9 +150,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
150
150
  spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
151
151
  spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
152
152
  spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
153
- spacr-0.3.42.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
154
- spacr-0.3.42.dist-info/METADATA,sha256=_nbP3IjQELrampyAYyt6hfrQBukDHuhlS7CApsMPsQ0,5949
155
- spacr-0.3.42.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
156
- spacr-0.3.42.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
157
- spacr-0.3.42.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
158
- spacr-0.3.42.dist-info/RECORD,,
153
+ spacr-0.3.43.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
154
+ spacr-0.3.43.dist-info/METADATA,sha256=NBeaa28RVdVWa4lgSnWFKTuqVS_hJzUtg3sdMjgmf40,5949
155
+ spacr-0.3.43.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
156
+ spacr-0.3.43.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
157
+ spacr-0.3.43.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
158
+ spacr-0.3.43.dist-info/RECORD,,
File without changes